diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e2e32250..fb8c232e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -13,11 +13,11 @@ jobs:
       - name: Setup MPI
         uses: mpi4py/setup-mpi@v1
         with:
-          mpi: openmpi 
+          mpi: openmpi
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -r requirements.txt 
+          pip install -r requirements.txt
           pip install -r dev/dev.txt
       - name: pylint
         run: |
@@ -32,7 +32,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [ '3.8', '3.9', '3.10' ]
+        python-version: [ '3.10', '3.11', '3.12' ]
         mpi:
           - openmpi
     runs-on:  ubuntu-latest
@@ -53,7 +53,7 @@ jobs:
         pip install -r dev/dev.txt
         pip install pyblock
         # https://github.com/JoonhoLee-Group/ipie/issues/278
-        pip install "pyscf<=2.3.0"
+        pip install pyscf
     - name: Install package
       run: |
         # HACK FOR LEGACY CODE!
@@ -81,10 +81,21 @@ jobs:
           python -m pip install --upgrade pip
           pip install -r requirements.txt
           pip install pytest-xdist
+      - name: Install package
+        run: |
+          python -m pip install -e .
       - name: No mpi4py
         run: |
           python -c "from ipie.config import MPI; assert \"FakeComm\" in str(MPI.COMM_WORLD)"
           python -m pytest -n=auto
+      - name: wicks helper
+        run: |
+          sudo apt-get -y update && sudo apt-get -y install cmake
+          cc --version
+          root=$(pwd)
+          cd ipie/lib/wicks && mkdir build && cd build && cmake .. && make VERBOSE=1 && cd $root
+          # pytest ipie/trial_wavefunction/ -m wicks
+          # pytest ipie/lib/wicks -m wicks
   integration:
     strategy:
       fail-fast: false
@@ -97,11 +108,11 @@ jobs:
       - name: Setup MPI
         uses: mpi4py/setup-mpi@v1
         with:
-          mpi: openmpi 
+          mpi: openmpi
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -r requirements.txt 
+          pip install -r requirements.txt
           pip install -r dev/dev.txt
       - name: Install package
         run: |
@@ -120,14 +131,14 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -r requirements.txt 
+          pip install -r requirements.txt
           pip install pyblock
           # https://github.com/JoonhoLee-Group/ipie/issues/278
-          pip install "pyscf<=2.3.0"
+          pip install pyscf fqe
       - name: Install package
         run: |
           python -m pip install -e .
       - name: Test Examples
         timeout-minutes: 10
         run: |
-          python dev/run_tests.py --examples
\ No newline at end of file
+          python dev/run_tests.py --examples
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index ec4024c0..0e8b2601 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -1,4 +1,4 @@
-name: Publish ipie 
+name: Publish ipie
 
 on:
   push:
@@ -37,7 +37,7 @@ jobs:
         name: python-package-distributions
         path: dist/
     - name: Test the wheel
-      run: | 
+      run: |
         ls dist
         wheel=$(find dist/*whl)
         echo $wheel
@@ -48,9 +48,9 @@ jobs:
     - name: Setup MPI
       uses: mpi4py/setup-mpi@v1
       with:
-        mpi: openmpi 
+        mpi: openmpi
     - name: Test the wheel
-      run: | 
+      run: |
         pip install mpi4py
         python -c "import mpi4py"
         python -c "from ipie.config import MPI; assert \"mpi4py\" in str(MPI.COMM_WORLD)"
diff --git a/.gitignore b/.gitignore
index 19ea23c8..3cdec5a4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -108,4 +108,7 @@ ipie/qmc/tests/reference_data/**/*.h5
 *.txt
 *wheels*
 *FCIDUMP*
-*out*
\ No newline at end of file
+*out*
+
+*.code-workspace
+
diff --git a/.pylintrc b/.pylintrc
index 9ba5f042..59d6545e 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -23,8 +23,8 @@ output-format=colorized
 disable=all
 enable=
       no-member,
-    # renable once classes are tidied
-;     attribute-defined-outside-init,
+      # renable once classes are tidied
+      # attribute-defined-outside-init,
       consider-using-f-string,
       useless-object-inheritance,
       unused-variable,
@@ -68,4 +68,4 @@ enable=
       no-else-return,
 # https://github.com/pylint-dev/pylint/issues/2178
 # ignore urls
-ignore-long-lines=^\s*(# )?<?https?://\S+>?$|^\s*(\w*\s*=\s*)?(\"|\').*(\"|\'),?\s*$
\ No newline at end of file
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$|^\s*(\w*\s*=\s*)?(\"|\').*(\"|\'),?\s*$
diff --git a/README.rst b/README.rst
index 7f2c9982..7bb76660 100644
--- a/README.rst
+++ b/README.rst
@@ -22,6 +22,7 @@ ipie inherits a lot of QMC features from pauxy.
     :target: https://arxiv.org/abs/2209.04015
 
 Copyright by Joonho Lee (joonholee@g.harvard.edu)
+Our first release paper can be found at https://pubs.acs.org/doi/10.1021/acs.jctc.2c00934
 
 Features
 --------
diff --git a/dev/run_tests.py b/dev/run_tests.py
old mode 100644
new mode 100755
index 710fb22a..03dd61ff
--- a/dev/run_tests.py
+++ b/dev/run_tests.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
 import argparse
 import glob
 import os
diff --git a/docs/source/conf.py b/docs/source/conf.py
index d4ae96e4..f7a51893 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -24,7 +24,7 @@
 
 import mock
 
-MOCK_MODULES = ["h5py","mpi4py"]
+MOCK_MODULES = ["h5py", "mpi4py"]
 for mod_name in MOCK_MODULES:
     sys.modules[mod_name] = mock.Mock()
 autodoc_mock_imports = MOCK_MODULES
diff --git a/docs/source/dev/index.md b/docs/source/dev/index.md
index 08b4669a..f1920d7d 100644
--- a/docs/source/dev/index.md
+++ b/docs/source/dev/index.md
@@ -65,3 +65,13 @@ export IPIE_USE_GPU=1; mpirun -np 1 pytest -m gpu -sv
 
 -   Note if running CPU test afterwards it may be necessary to clear the
     environment variable!
+
+
+## Releasing a package
+
+1. Change the version from 'X.Y.Z.dev0' to 'X.Y.Z'
+2. Merge develop into main. Do not squash the merge (enables sensible release notes.)
+3. Push tag 'vX.Y.Z' (no dev0).
+4. Check actions and ensure build and publish steps run.
+5. Create new branch and bump version to 'X.(Y+1).Z.dev0'.
+6. Send PR for this branch into develop.
diff --git a/examples/01-simple/scf.py b/examples/01-simple/scf.py
index a539d78f..66067d56 100644
--- a/examples/01-simple/scf.py
+++ b/examples/01-simple/scf.py
@@ -1,4 +1,3 @@
-
 # Copyright 2022 The ipie Developers. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/examples/03-custom_observable/run_afqmc.py b/examples/03-custom_observable/run_afqmc.py
index a4100b36..e97e455a 100644
--- a/examples/03-custom_observable/run_afqmc.py
+++ b/examples/03-custom_observable/run_afqmc.py
@@ -15,6 +15,8 @@
 # Author: Fionn Malone <fmalone@google.com>
 #
 
+from typing import Dict
+
 import numpy as np
 from pyscf import gto, scf
 
@@ -86,11 +88,11 @@ def __init__(self, ham):
         # Must specify that we're dealing with array valued estimator
         self.scalar_estimator = False
 
-    def compute_estimator(self, system, walker_batch, hamiltonian, trial_wavefunction):
-        trial_wavefunction.calc_greens_function(walker_batch, build_full=True)
-        numer = np.einsum("w,wii->i", walker_batch.weight, walker_batch.Ga + walker_batch.Gb)
+    def compute_estimator(self, system=None, walkers=None, hamiltonian=None, trial=None):
+        trial.calc_greens_function(walkers, build_full=True)
+        numer = np.einsum("w,wii->i", walkers.weight, walkers.Ga + walkers.Gb)
         self["DiagGNumer"] = numer
-        self["DiagGDenom"] = sum(walker_batch.weight)
+        self["DiagGDenom"] = sum(walkers.weight)
 
 
 afqmc = build_afqmc_driver(comm, nelec=mol.nelec)
@@ -127,25 +129,28 @@ def __init__(self, ham):
         # Must specify that we're dealing with array valued estimator
         self.scalar_estimator = False
 
-    def compute_estimator(self, system, walker_batch, hamiltonian, trial_wavefunction):
-        trial_wavefunction.calc_greens_function(walker_batch, build_full=True)
+    def compute_estimator(self, system=None, walkers=None, hamiltonian=None, trial=None):
+        trial.calc_greens_function(walkers, build_full=True)
         numer = np.array(
             [
-                np.einsum("w,wij->ij", walker_batch.weight, walker_batch.Ga),
-                np.einsum("w,wij->ij", walker_batch.weight, walker_batch.Gb),
+                np.einsum("w,wij->ij", walkers.weight, walkers.Ga),
+                np.einsum("w,wij->ij", walkers.weight, walkers.Gb),
             ]
         )
 
         # For multidimensional arrays we must flatten the data
         self["GNumer"] = numer.ravel()
-        self["GDenom"] = sum(walker_batch.weight)
+        self["GDenom"] = sum(walkers.weight)
 
 
 afqmc = build_afqmc_driver(comm, nelec=mol.nelec)
 # Let us override the number of blocks to keep it short
 afqmc.params.num_blocks = 20
 # We can now add this to the estimator handler object in the afqmc driver
-add_est = {"diagG": Diagonal1RDM(ham=afqmc.hamiltonian), "1RDM": Mixed1RDM(ham=afqmc.hamiltonian)}
+add_est: Dict[str, EstimatorBase] = {
+    "diagG": Diagonal1RDM(ham=afqmc.hamiltonian),
+    "1RDM": Mixed1RDM(ham=afqmc.hamiltonian),
+}
 afqmc.run(additional_estimators=add_est)
 # We can extract the qmc data as as a pandas data frame like so
 from ipie.analysis.extraction import extract_observable
diff --git a/examples/05-frozen_core/scf.py b/examples/05-frozen_core/scf.py
index 9e37e58c..a0bba0d5 100644
--- a/examples/05-frozen_core/scf.py
+++ b/examples/05-frozen_core/scf.py
@@ -1,4 +1,3 @@
-
 # Copyright 2022 The ipie Developers. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/examples/06-gpu/chunked_chol.py b/examples/06-gpu/chunked_chol.py
new file mode 100644
index 00000000..883e2eef
--- /dev/null
+++ b/examples/06-gpu/chunked_chol.py
@@ -0,0 +1,23 @@
+from pyscf import cc, gto, scf
+from ipie.utils.mpi import make_splits_displacements
+import h5py
+import numpy as np
+import gc 
+
+
+mol = gto.M(
+    atom=[("H", 1.6 * i, 0, 0) for i in range(0, 4)],
+    basis="sto-6g",
+    verbose=4,
+    unit="Bohr",
+)
+mf = scf.UHF(mol)
+mf.chkfile = "scf.chk"
+mf.kernel()
+
+from ipie.utils.from_pyscf import gen_ipie_input_from_pyscf_chk
+gen_ipie_input_from_pyscf_chk(mf.chkfile, verbose=0)
+
+
+from ipie.utils.chunk_large_chol import split_cholesky
+split_cholesky('hamiltonian.h5', 4) # split the cholesky to 4 subfiles
\ No newline at end of file
diff --git a/examples/06-gpu/run_afqmc_chunked.py b/examples/06-gpu/run_afqmc_chunked.py
new file mode 100644
index 00000000..7f2a2ada
--- /dev/null
+++ b/examples/06-gpu/run_afqmc_chunked.py
@@ -0,0 +1,117 @@
+import sys
+import h5py
+import numpy
+
+from ipie.hamiltonians.generic_chunked import GenericRealCholChunked as HamGeneric
+from ipie.qmc.afqmc import AFQMC
+from ipie.systems.generic import Generic
+from ipie.trial_wavefunction.single_det import SingleDet
+from ipie.utils.mpi import MPIHandler
+
+import os
+from ipie.utils.backend import arraylib as xp
+from pyscf import gto
+
+try:
+    import cupy
+    from mpi4py import MPI
+except ImportError:
+    sys.exit(0)
+
+from chunked_chol import *
+
+mol = gto.M(
+    atom=[("H", 1.6 * i, 0, 0) for i in range(0, 4)],
+    basis="sto-6g",
+    verbose=4,
+    unit="Bohr",
+)
+
+from ipie.config import config
+config.update_option("use_gpu", True)
+
+gpu_number_per_node = 4
+nmembers = 4
+gpu_id = MPI.COMM_WORLD.rank % gpu_number_per_node
+xp.cuda.Device(gpu_id).use()
+
+comm = MPI.COMM_WORLD
+num_walkers = 1224 // comm.size
+nsteps = 25
+nblocks = 100
+timestep = 0.005
+rng_seed = None
+
+
+with h5py.File("hamiltonian.h5") as fa:
+    e0 = fa["e0"][()]
+    hcore = fa["hcore"][()]
+
+rank = comm.Get_rank()
+size = comm.Get_size()
+srank = rank % nmembers
+
+from ipie.utils.mpi import MPIHandler, make_splits_displacements
+handler = MPIHandler(nmembers=nmembers)
+
+from ipie.utils.pack_numba import pack_cholesky
+
+num_basis = hcore.shape[-1]
+with h5py.File(f"chol_{srank}.h5") as fa:
+    chol_chunk = fa["chol"][()]
+
+chunked_chols = chol_chunk.shape[-1]
+num_chol = handler.scomm.allreduce(chunked_chols, op=MPI.SUM)
+
+chol_chunk_view = chol_chunk.reshape((num_basis, num_basis, -1))
+cp_shape = (num_basis * (num_basis + 1) // 2, chol_chunk_view.shape[-1])
+chol_packed_chunk = numpy.zeros(cp_shape, dtype=chol_chunk_view.dtype)
+sym_idx = numpy.triu_indices(num_basis)
+pack_cholesky(sym_idx[0], sym_idx[1], chol_packed_chunk, chol_chunk_view)
+del chol_chunk_view
+
+split_size = make_splits_displacements(num_chol, nmembers)[0]
+assert chunked_chols == split_size[srank]
+
+with h5py.File("wavefunction.h5") as fa:
+    phi0a = fa["phi0_alpha"][()]
+    psiT = fa["psi_T_alpha"][()]
+
+
+num_basis = hcore.shape[-1]
+mol_nelec = mol.nelec
+system = Generic(nelec=mol_nelec)
+ham = HamGeneric(
+    numpy.array([hcore, hcore]),
+    None,
+    chol_chunk,
+    chol_packed_chunk,
+    e0, handler
+)
+ham.nchol = num_chol
+ham.handler = handler
+
+trial = SingleDet(numpy.hstack([psiT, psiT]), mol_nelec, num_basis, handler)
+trial.build()
+trial.half_rotate(ham)
+
+from ipie.walkers.uhf_walkers import UHFWalkers
+from ipie.utils.mpi import MPIHandler
+walkers = UHFWalkers(numpy.hstack([phi0a, phi0a]), system.nup, system.ndown, ham.nbasis, num_walkers, mpi_handler=handler)
+walkers.build(trial)
+
+afqmc = AFQMC.build(
+    mol_nelec,
+    ham,
+    trial,
+    walkers,
+    num_walkers,
+    rng_seed,
+    nsteps,
+    nblocks,
+    timestep,
+    mpi_handler=handler)
+
+
+afqmc.run()
+afqmc.finalise(verbose=True)
diff --git a/examples/07-custom_trial/run_afqmc.py b/examples/07-custom_trial/run_afqmc.py
index 58b34375..48379be0 100644
--- a/examples/07-custom_trial/run_afqmc.py
+++ b/examples/07-custom_trial/run_afqmc.py
@@ -91,7 +91,7 @@ def __init__(
             trial=trial,
         )
 
-    def compute_estimator(self, system, walkers, hamiltonian, trial, istep=1):
+    def compute_estimator(self, system, walkers, hamiltonian, trial):
         trial.calc_greens_function(walkers)
         # Need to be able to dispatch here
         energy = local_energy_batch(system, hamiltonian, walkers, trial)
diff --git a/examples/08-custom_walker/run_afqmc.py b/examples/08-custom_walker/run_afqmc.py
index abfd110d..4dd545da 100644
--- a/examples/08-custom_walker/run_afqmc.py
+++ b/examples/08-custom_walker/run_afqmc.py
@@ -94,7 +94,7 @@ def __init__(
             trial=trial,
         )
 
-    def compute_estimator(self, system, walkers, hamiltonian, trial, istep=1):
+    def compute_estimator(self, system, walkers, hamiltonian, trial):
         trial.calc_greens_function(walkers)
         # Need to be able to dispatch here
         energy = local_energy_batch(system, hamiltonian, walkers, trial)
@@ -199,6 +199,7 @@ def reortho(self):
     trial,
     walkers,
     propagator,
+    mpi_handler,
     params,
 )
 estimator = NoisyEnergyEstimator(system=system, ham=ham, trial=trial)
diff --git a/examples/13-free_projection/fp_afqmc.ipynb b/examples/13-free_projection/fp_afqmc.ipynb
new file mode 100644
index 00000000..12bd8aeb
--- /dev/null
+++ b/examples/13-free_projection/fp_afqmc.ipynb
@@ -0,0 +1,180 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from pyscf import fci, gto, scf\n",
+    "\n",
+    "np.set_printoptions(precision=5, suppress=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "r = 1.6\n",
+    "nH = 10\n",
+    "# geom = f\"H {-3*r/2} 0 0; H {-r/2} 0 0; H {r/2} 0 0; H {3*r/2} 0 0\"\n",
+    "geom = \"\"\n",
+    "for i in range(nH):\n",
+    "    geom += \"H 0 0 %g\\n\" % (i * r)\n",
+    "mol = gto.M(atom=geom, basis=\"sto-6g\", verbose=3, unit=\"bohr\")\n",
+    "\n",
+    "mf = scf.RHF(mol)\n",
+    "mf.kernel()\n",
+    "\n",
+    "umf = scf.UHF(mol)\n",
+    "umf.kernel()\n",
+    "mo1 = umf.stability(external=True)[0]\n",
+    "umf = umf.newton().run(mo1, umf.mo_occ)\n",
+    "mo1 = umf.stability(external=True)[0]\n",
+    "umf = umf.newton().run(mo1, umf.mo_occ)\n",
+    "\n",
+    "# fci\n",
+    "cisolver = fci.FCI(mol, mf.mo_coeff)\n",
+    "e, ci = cisolver.kernel()\n",
+    "print(\"FCI energy: \", e)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Free projection AFQMC\n",
+    "\n",
+    "Evaluates the quantity\n",
+    "\n",
+    "$$ E(\\tau) = \\frac{\\langle \\Psi_l | H e^{-\\tau H} | \\Psi_r \\rangle}{\\langle \\Psi_l | e^{-\\tau H} | \\Psi_r \\rangle} $$\n",
+    "\n",
+    "where $|\\Psi_l\\rangle$ is a trial wave function and $|\\Psi_r\\rangle$ is an initial state. The propagator is sampled using Monte Carlo. $E(\\tau)$ converges to the ground state energy at long $\\tau$, but the energies get noisier at long $\\tau$ due to the sign problem.\n",
+    "\n",
+    "In the following, energy evaluations are performed after a block consisting of `num_steps` steps of duration `dt`. In one iteration, energy samples are collected at `num_blocks` different $\\tau$ values. Multiple walkers are used to batch operations together for computational efficiency. The total number of samples at a given $\\tau$ is given by `num_walkers` $\\times$ `num_iterations_fp`. The energy is then averaged over walkers and iterations.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from ipie.addons.free_projection.qmc.calc import build_fpafqmc_driver\n",
+    "from ipie.config import MPI\n",
+    "from ipie.utils.from_pyscf import gen_ipie_input_from_pyscf_chk\n",
+    "\n",
+    "comm = MPI.COMM_WORLD\n",
+    "\n",
+    "gen_ipie_input_from_pyscf_chk(umf.chkfile, verbose=0)\n",
+    "qmc_options = {\n",
+    "    \"num_iterations_fp\": 100,\n",
+    "    \"num_blocks\": 4,\n",
+    "    \"num_steps\": 30,\n",
+    "    \"num_walkers\": 50,\n",
+    "    \"dt\": 0.05,\n",
+    "}\n",
+    "afqmc = build_fpafqmc_driver(\n",
+    "    comm,\n",
+    "    nelec=mol.nelec,\n",
+    "    seed=212503,\n",
+    "    qmc_options=qmc_options,\n",
+    ")\n",
+    "afqmc.run()\n",
+    "\n",
+    "# analysis\n",
+    "from ipie.addons.free_projection.analysis.extraction import extract_observable\n",
+    "from ipie.addons.free_projection.analysis.jackknife import jackknife_ratios\n",
+    "\n",
+    "for i in range(afqmc.params.num_blocks):\n",
+    "    print(\n",
+    "        f\"\\nEnergy statistics at time {(i+1) * afqmc.params.num_steps_per_block * afqmc.params.timestep}:\"\n",
+    "    )\n",
+    "    qmc_data = extract_observable(afqmc.estimators[i].filename, \"energy\")\n",
+    "    mean_energy, energy_err = jackknife_ratios(qmc_data[\"ENumer\"], qmc_data[\"EDenom\"])\n",
+    "    print(f\"  Energy: {mean_energy:.8e} +- {energy_err:.8e}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Phaseless AFQMC"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from ipie.qmc.calc import build_afqmc_driver\n",
+    "from ipie.config import MPI\n",
+    "from ipie.utils.from_pyscf import gen_ipie_input_from_pyscf_chk\n",
+    "\n",
+    "comm = MPI.COMM_WORLD\n",
+    "\n",
+    "gen_ipie_input_from_pyscf_chk(mf.chkfile, verbose=0)\n",
+    "\n",
+    "# fixing random seed for reproducibility\n",
+    "afqmc = build_afqmc_driver(comm, nelec=mol.nelec, num_walkers_per_task=100, seed=41100801)\n",
+    "if comm.rank == 0:\n",
+    "    print(afqmc.params)  # Inspect the default qmc options\n",
+    "\n",
+    "# Let us override the number of blocks to keep it short\n",
+    "afqmc.params.num_blocks = 400\n",
+    "afqmc.run()\n",
+    "\n",
+    "if comm.rank == 0:\n",
+    "    # We can extract the qmc data as as a pandas data frame like so\n",
+    "    from ipie.analysis.extraction import extract_observable\n",
+    "\n",
+    "    qmc_data = extract_observable(afqmc.estimators.filename, \"energy\")\n",
+    "    y = qmc_data[\"ETotal\"]\n",
+    "    y = y[50:]  # discard first 50 blocks\n",
+    "\n",
+    "    from ipie.analysis.autocorr import reblock_by_autocorr\n",
+    "\n",
+    "    df = reblock_by_autocorr(y, verbose=1)\n",
+    "    print(df.to_csv(index=False))\n",
+    "    # assert np.isclose(df.at[0,'ETotal_ac'], -5.325611614468466)\n",
+    "    # assert np.isclose(df.at[0,'ETotal_error_ac'], 0.00938082351500978)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "py39",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/13-free_projection/run_afqmc.py b/examples/13-free_projection/run_afqmc.py
new file mode 100644
index 00000000..e27e0781
--- /dev/null
+++ b/examples/13-free_projection/run_afqmc.py
@@ -0,0 +1,54 @@
+from pyscf import cc, gto, scf
+
+from ipie.config import MPI
+from ipie.utils.from_pyscf import gen_ipie_input_from_pyscf_chk
+
+comm = MPI.COMM_WORLD
+mol = gto.M(
+    atom=[("H", 1.6 * i, 0, 0) for i in range(0, 10)],
+    basis="sto-6g",
+    verbose=4,
+    unit="Bohr",
+)
+if comm.rank == 0:
+    mf = scf.UHF(mol)
+    mf.chkfile = "scf.chk"
+    mf.kernel()
+    mycc = cc.UCCSD(mf).run()
+    et = mycc.ccsd_t()
+    print("UCCSD(T) energy {}".format(mf.e_tot + mycc.e_corr + et))
+
+    gen_ipie_input_from_pyscf_chk(mf.chkfile, verbose=0)
+comm.barrier()
+
+from ipie.addons.free_projection.qmc.calc import build_fpafqmc_driver
+
+qmc_options = {
+    "num_iterations_fp": 100,
+    "num_blocks": 5,
+    "num_steps": 20,
+    "num_walkers": 10,
+    "dt": 0.05,
+}
+afqmc = build_fpafqmc_driver(
+    comm,
+    nelec=mol.nelec,
+    seed=41100801,
+    qmc_options=qmc_options,
+)
+if comm.rank == 0:
+    print(afqmc.params)  # Inspect the default qmc options
+afqmc.run()
+
+# analysis
+if comm.rank == 0:
+    from ipie.addons.free_projection.analysis.extraction import extract_observable
+    from ipie.addons.free_projection.analysis.jackknife import jackknife_ratios
+
+    for i in range(afqmc.params.num_blocks):
+        print(
+            f"\nEnergy statistics at time {(i+1) * afqmc.params.num_steps_per_block * afqmc.params.timestep}:"
+        )
+        qmc_data = extract_observable(afqmc.estimators[i].filename, "energy")
+        energy_mean, energy_err = jackknife_ratios(qmc_data["ENumer"], qmc_data["EDenom"])
+        print(f"Energy: {energy_mean:.8e} +/- {energy_err:.8e}")
diff --git a/examples/14-fqe-wavefunction/run_afqmc.py b/examples/14-fqe-wavefunction/run_afqmc.py
new file mode 100644
index 00000000..08cd9bb6
--- /dev/null
+++ b/examples/14-fqe-wavefunction/run_afqmc.py
@@ -0,0 +1,218 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Fionn Malone <fmalone@google.com>
+#
+"""Convert an FQE wavefunction to ipie and vice-versa.
+
+Play around with various thresholds to see how it affects the energy.
+"""
+import sys
+from typing import List, Tuple, Union
+
+try:
+    import fqe
+except (ImportError, ModuleNotFoundError):
+    print("fqe required")
+    sys.exit(0)
+try:
+    import pyscf
+except (ImportError, ModuleNotFoundError):
+    print("pyscf required")
+    sys.exit(0)
+import numpy as np
+from pyscf import ao2mo, gto, mcscf, scf
+
+from ipie.hamiltonians.generic import GenericRealChol as GenericHam
+from ipie.systems.generic import Generic as GenericSys
+from ipie.trial_wavefunction.particle_hole import ParticleHole
+from ipie.utils.from_pyscf import generate_hamiltonian
+
+
+def get_occa_occb_coeff_from_fqe_wfn(
+    fqe_wf: fqe.Wavefunction, threshold: float = 0.0
+) -> Tuple[List[np.ndarray], ...]:
+    """Generate occlists from fqe wavefunction."""
+
+    def _get_sector_data(sector, threshold, occa_list, occb_list, coeffs):
+        for inda in range(sector._core.lena()):
+            alpha_str = sector._core.string_alpha(inda)
+            for indb in range(sector._core.lenb()):
+                if np.abs(sector.coeff[inda, indb]) > threshold:
+                    alpha_str = sector._core.string_alpha(inda)
+                    beta_str = sector._core.string_beta(indb)
+                    coeff = sector.coeff[inda, indb]
+
+                    occa_list.append(fqe.bitstring.integer_index(alpha_str))
+                    occb_list.append(fqe.bitstring.integer_index(beta_str))
+                    coeffs.append(coeff)
+
+    occa_list: List[np.ndarray] = []
+    occb_list: List[np.ndarray] = []
+    coeffs: List[np.ndarray] = []
+
+    for sector_key in fqe_wf.sectors():
+        sector = fqe_wf.sector(sector_key)
+        _get_sector_data(sector, threshold, occa_list, occb_list, coeffs)
+
+    return (np.asarray(coeffs), np.asarray(occa_list), np.asarray(occb_list))
+
+
+def get_fqe_wfn_from_occ_coeff(
+    coeffs: np.ndarray,
+    occa: np.ndarray,
+    occb: np.ndarray,
+    n_elec: int,
+    n_orb: int,
+    ms: int = 0,
+    threshold: float = 0.0,
+) -> fqe.Wavefunction:
+    """A helper function to map an AFQMC wavefunction to FQE.
+
+    Args:
+        coeffs: The ci coefficients
+        occa: The alpha occupation strings.
+        occb: The beta occupation strings.
+        n_elec: Number of electrons.
+        n_orb: number of orbitals.
+        ms: spin polarization.
+        threshold: ci coefficient threshold. A coefficient whose absolute value
+            below this value is considered zero.
+    """
+
+    def _set_sector_data(sector, threshold, occa_list, occb_list, coeffs):
+        fqe_graph = sector.get_fcigraph()
+        for idet, (occa, occb) in enumerate(zip(occa_list, occb_list)):
+            alpha_str = fqe.bitstring.reverse_integer_index(occa)
+            beta_str = fqe.bitstring.reverse_integer_index(occb)
+            inda = fqe_graph.index_alpha(alpha_str)
+            indb = fqe_graph.index_alpha(beta_str)
+            if np.abs(coeffs[idet]) > threshold:
+                sector.coeff[inda, indb] = coeffs[idet]
+
+    # ensure it is normalized
+    _coeffs = coeffs / np.dot(coeffs.conj(), coeffs) ** 0.5
+    fqe_wf = fqe.Wavefunction([[n_elec, ms, n_orb]])
+
+    for sector_key in fqe_wf.sectors():
+        sector = fqe_wf.sector(sector_key)
+        _set_sector_data(sector, threshold, occa, occb, _coeffs)
+
+    return fqe_wf
+
+
+def get_fqe_variational_energy(
+    ecore: float, h1e: np.ndarray, eris: np.ndarray, wfn: fqe.Wavefunction
+) -> float:
+    """Compute FQE variational energy from ERIs and FQE wavefunction."""
+    # get integrals into openfermion order
+    of_eris = np.transpose(eris, (0, 2, 3, 1))
+    # ... and then into FQE format
+    fqe_ham = fqe.restricted_hamiltonian.RestrictedHamiltonian(
+        (h1e, np.einsum("ijlk", -0.5 * of_eris)), e_0=ecore
+    )
+    return wfn.expectationValue(fqe_ham).real
+
+
+def build_ipie_wavefunction_from_pyscf(
+    fcivec: np.ndarray, mc: Union[pyscf.mcscf.CASCI, pyscf.mcscf.CASSCF], tol: float = 1e-12
+) -> ParticleHole:
+    """Build ipie wavefunction in the full space (i.e. with "melting cores")"""
+    coeff, occa, occb = zip(
+        *pyscf.fci.addons.large_ci(fcivec, mc.ncas, mc.nelecas, tol=tol, return_strs=False)
+    )
+    ix = np.argsort(np.abs(coeff))[::-1]
+    nelec = mc._scf.mol.nelec
+    nmo = mc._scf.mo_coeff.shape[-1]
+    return ParticleHole((np.array(coeff)[ix], np.array(occa)[ix], np.array(occb)[ix]), nelec, nmo)
+
+
+def strip_melting_cores(
+    occa: np.ndarray, occb: np.ndarray, n_melting: int
+) -> Tuple[np.ndarray, np.ndarray]:
+    """Strip any melting cores from ipie wavefunction."""
+    occa_new = []
+    occb_new = []
+    for oa, ob in zip(occa, occb):
+        # ipie typically builds the cas wavefunction in the full space by inserting "melting" cores
+        # To map back to the active space you need to strip these and then shift
+        # the orbital indices back by the number of melting cores (n_melting)
+        occa_new.append(np.array([o - n_melting for o in oa[n_melting:]]))
+        occb_new.append(np.array([o - n_melting for o in ob[n_melting:]]))
+
+    return np.array(occa_new), np.array(occb_new)
+
+
+def build_ipie_sys_ham_from_pyscf(
+    mc: Union[pyscf.mcscf.CASCI, pyscf.mcscf.CASSCF], chol_cut: float = 1e-6
+) -> Tuple[GenericSys, GenericHam]:
+    """Build ipie system and hamiltonian from MCSCF object."""
+    ham = generate_hamiltonian(
+        mc._scf.mol,
+        mc.mo_coeff,
+        mc._scf.get_hcore(),
+        mc.mo_coeff,
+        chol_cut=chol_cut,
+        num_frozen_core=0,
+        verbose=False,
+    )
+    nelec = (mol.nelec[0], mol.nelec[1])
+    return GenericSys(nelec), ham
+
+
+if __name__ == "__main__":
+    mol = gto.Mole(atom=[("N", (0.0, 0.0, 0.0)), ("N", (0.0, 0.0, 1.45))], spin=0, basis="sto-3g")
+    mol.build()
+    mf = scf.RHF(mol)
+    mf.kernel()
+
+    nalpha, nbeta = mol.nelec
+    nmo = mf.mo_coeff.shape[1]
+
+    ncas, nelecas = (6, 6)
+    mc = mcscf.CASSCF(mf, nelecas, ncas)
+
+    e_tot, e_cas, fcivec, mo, mo_energy = mc.kernel()
+    print(f"DIM(H) = {fcivec.ravel().shape}")
+    # Get the active space ERIs in the CASSCF MO basis
+    h1e, e0 = mc.get_h1eff(mc.mo_coeff)
+    eris = ao2mo.restore("1", mc.get_h2eff(mc.mo_coeff), ncas).reshape((ncas,) * 4)
+    # you can check how truncating the wavefunction affects the energy
+    wfn = build_ipie_wavefunction_from_pyscf(fcivec, mc, tol=0.0)
+    print(f"Length of truncated CI expansion: {wfn.num_dets}")
+    # you check how truncating the Cholesky dimension affects the energy
+    sys, ham = build_ipie_sys_ham_from_pyscf(mc, chol_cut=1e-12)
+
+    ipie_energy = wfn.calculate_energy(sys, ham)[0]
+    msg = f"{ipie_energy.real:.10f}"
+    print(f"ipie energy: {msg}")
+    assert np.isclose(e_tot, ipie_energy, atol=1e-8), f"{e_tot} != {msg}"
+
+    # Convert to FQE and check the energy
+    occa_fqe, occb_fqe = strip_melting_cores(wfn.occa, wfn.occb, wfn.nmelting)
+    fqe_wfn = get_fqe_wfn_from_occ_coeff(
+        wfn.coeffs, occa_fqe, occb_fqe, nelecas, ncas, ms=0, threshold=0.0
+    )
+    fqe_energy = get_fqe_variational_energy(e0, h1e, eris, fqe_wfn)
+    msg = f"{fqe_energy.real:.10f}"
+    print(f"FQE energy: {msg}")
+    assert np.isclose(e_tot, fqe_energy, atol=1e-8), f"{e_tot} != {msg}"
+
+    # round trip back to ipie
+    coeff, occa, occb = get_occa_occb_coeff_from_fqe_wfn(fqe_wfn, threshold=0.0)
+    wfn_round_trip = ParticleHole((coeff, occa, occb), mc._scf.mol.nelec, mc.mo_coeff.shape[-1])
+    ipie_energy = wfn_round_trip.calculate_energy(sys, ham)[0]
+    msg = f"{ipie_energy.real:.10f}"
+    print(f"ipie energy from round trip: {msg}")
+    assert np.isclose(e_tot, ipie_energy, atol=1e-8), f"{e_tot} != {msg}"
diff --git a/examples/15-share_mem_cpu/run_afqmc.py b/examples/15-share_mem_cpu/run_afqmc.py
new file mode 100644
index 00000000..5657a716
--- /dev/null
+++ b/examples/15-share_mem_cpu/run_afqmc.py
@@ -0,0 +1,73 @@
+import sys
+import h5py
+import numpy
+
+from ipie.hamiltonians.generic_chunked import GenericRealCholChunked as HamGeneric
+from ipie.qmc.afqmc import AFQMC
+from ipie.systems.generic import Generic
+from ipie.trial_wavefunction.single_det import SingleDet
+from ipie.utils.mpi import MPIHandler
+from ipie.hamiltonians.utils import get_hamiltonian
+
+import os
+from ipie.utils.backend import arraylib as xp
+from pyscf import gto
+
+try:
+    from mpi4py import MPI
+except ImportError:
+    sys.exit(0)
+
+
+mol = gto.M(
+    atom=[("H", 1.6 * i, 0, 0) for i in range(0, 4)],
+    basis="sto-6g",
+    verbose=4,
+    unit="Bohr",
+)
+
+mf = scf.UHF(mol)
+mf.chkfile = "scf.chk"
+mf.kernel()
+
+from ipie.utils.from_pyscf import gen_ipie_input_from_pyscf_chk
+gen_ipie_input_from_pyscf_chk(mf.chkfile, verbose=0)
+
+shared_comm = get_shared_comm(comm, verbose=True)
+ham = get_hamiltonian(dir+"hamiltonian.h5", shared_comm, verbose=True, pack_chol=True)
+
+with h5py.File(dir+"wavefunction.h5") as fa:
+    phi0a = fa["phi0_alpha"][()]
+    psiT = fa["psi_T_alpha"][()]
+
+num_basis = phi0a.shape[0]
+mol_nelec = mol.nelec
+system = Generic(nelec=mol_nelec)
+
+trial = SingleDet(numpy.hstack([psiT, psiT]), mol_nelec, num_basis)
+trial.build()
+trial.half_rotate(ham)
+
+from ipie.walkers.uhf_walkers import UHFWalkers
+from ipie.utils.mpi import MPIHandler
+walkers = UHFWalkers(numpy.hstack([phi0a, phi0a]), system.nup, system.ndown, ham.nbasis, num_walkers, mpi_handler=MPIHandler())
+
+num_walkers = 1224 // comm.size
+nsteps = 25
+nblocks = 100
+timestep = 0.005
+rng_seed = None
+
+afqmc = AFQMC.build(
+    mol_nelec,
+    ham,
+    trial,
+    walkers,
+    num_walkers,
+    rng_seed,
+    nsteps,
+    nblocks,
+    timestep)
+
+afqmc.run()
+afqmc.finalise(verbose=True)
diff --git a/examples/16-ft_afqmc/run_afqmc.py b/examples/16-ft_afqmc/run_afqmc.py
new file mode 100644
index 00000000..77783a78
--- /dev/null
+++ b/examples/16-ft_afqmc/run_afqmc.py
@@ -0,0 +1,88 @@
+import json
+
+import numpy
+from ueg import UEG
+
+from ipie.addons.thermal.qmc.calc import build_thermal_afqmc_driver
+from ipie.analysis.autocorr import reblock_by_autocorr
+from ipie.analysis.extraction import extract_observable
+from ipie.config import MPI
+
+comm = MPI.COMM_WORLD
+
+verbose = False if (comm.rank != 0) else True
+
+# 1. Generate UEG integrals.
+ueg_opts = {
+            "nup": 1,
+            "ndown": 1,
+            "rs": 3,
+            "ecut": 0.5,
+            "thermal": True,
+            "write_integrals": True
+            }
+
+ueg = UEG(ueg_opts, verbose=verbose)
+
+if comm.rank == 0:
+    ueg.build(verbose=verbose)
+
+comm.barrier()
+
+# 2. Build thermal AFQMC driver.
+options = {
+            'trial': {
+                'name': 'one_body',
+                },
+
+            'walkers': {
+                'lowrank': False,
+                },
+            
+            'qmc': {
+                'mu': 0.133579,
+                'beta': 10.,
+                'timestep': 0.5,
+                'nwalkers': 12 // comm.size,
+                'stack_size': 10,
+                'seed': 7,
+                'nblocks': 20,
+                },
+            }
+
+afqmc = build_thermal_afqmc_driver(
+            comm,
+            nelec=ueg.nelec,
+            hamiltonian_file='ueg_integrals.h5',
+            seed=7,
+            options=options,
+            verbosity=verbose
+        )
+
+if verbose:
+    print(f'\nThermal AFQMC options: \n{json.dumps(options, indent=4)}\n')
+    print(afqmc.params)  # Inspect the qmc options.
+
+# 3. Run thermal AFQMC calculation.
+afqmc.run(verbose=verbose)
+afqmc.finalise()
+afqmc.estimators.compute_estimators(hamiltonian=afqmc.hamiltonian, trial=afqmc.trial, walker_batch=afqmc.walkers)
+
+if comm.rank == 0:
+    energy_data = extract_observable(afqmc.estimators.filename, "energy")
+    number_data = extract_observable(afqmc.estimators.filename, "nav")
+
+    print(f'filename: {afqmc.estimators.filename}')
+    print(f'\nenergy_data: \n{energy_data}\n')
+    print(f'number_data: \n{number_data}\n')
+            
+    y = energy_data["ETotal"]
+    y = y[1:]  # Discard first 1 block.
+    df = reblock_by_autocorr(y, verbose=verbose)
+    print(df)
+    print()
+    
+    y = number_data["Nav"]
+    y = y[1:]  # Discard first 1 block.
+    df = reblock_by_autocorr(y, verbose=verbose)
+    print(df)
diff --git a/examples/16-ft_afqmc/ueg.py b/examples/16-ft_afqmc/ueg.py
new file mode 100644
index 00000000..8c08db48
--- /dev/null
+++ b/examples/16-ft_afqmc/ueg.py
@@ -0,0 +1,564 @@
+import numpy
+import scipy.sparse
+from ipie.utils.io import write_qmcpack_sparse
+
+
+class UEG(object):
+    """UEG system class (integrals read from fcidump)
+
+    Parameters
+    ----------
+    nup : int
+        Number of up electrons.
+
+    ndown : int
+        Number of down electrons.
+
+    rs : float
+        Density parameter.
+
+    ecut : float
+        Scaled cutoff energy.
+
+    ktwist : :class:`numpy.ndarray`
+        Twist vector.
+
+    verbose : bool
+        Print extra information.
+
+    Attributes
+    ----------
+    T : :class:`numpy.ndarray`
+        One-body part of the Hamiltonian. This is diagonal in plane wave basis.
+
+    ecore : float
+        Madelung contribution to the total energy.
+
+    h1e_mod : :class:`numpy.ndarray`
+        Modified one-body Hamiltonian.
+
+    nfields : int
+        Number of field configurations per walker for back propagation.
+
+    basis : :class:`numpy.ndarray`
+        Basis vectors within a cutoff.
+
+    kfac : float
+        Scale factor (2pi/L).
+    """
+
+    def __init__(self, options, verbose=False):
+        if verbose:
+            print("# Parsing input options.")
+
+        self.name = "UEG"
+        self.nup = options.get("nup")
+        self.ndown = options.get("ndown")
+        self.nelec = (self.nup, self.ndown)
+        self.rs = options.get("rs")
+        self.ecut = options.get("ecut")
+        self.ktwist = numpy.array(options.get("ktwist", [0, 0, 0])).reshape(3)
+
+        self.thermal = options.get("thermal", False)
+        self._alt_convention = options.get("alt_convention", False)
+        self.write_ints = options.get("write_integrals", False)
+        
+        self.sparse = True
+        self.control_variate = False
+        self.diagH1 = True
+
+        # Total # of electrons.
+        self.ne = self.nup + self.ndown
+        # Spin polarisation.
+        self.zeta = (self.nup - self.ndown) / self.ne
+        # Density.
+        self.rho = ((4.0 * numpy.pi) / 3.0 * self.rs**3.0) ** (-1.0)
+        # Box Length.
+        self.L = self.rs * (4.0 * self.ne * numpy.pi / 3.0) ** (1 / 3.0)
+        # Volume
+        self.vol = self.L**3.0
+        # k-space grid spacing.
+        self.kfac = 2 * numpy.pi / self.L
+        # Fermi Wavevector (infinite system).
+        self.kf = (3 * (self.zeta + 1) * numpy.pi**2 * self.ne / self.L**3) ** (1 / 3.0)
+        # Fermi energy (inifinite systems).
+        self.ef = 0.5 * self.kf**2
+        # Core energy.
+        self.ecore = 0.5 * self.ne * self.madelung()
+
+        if verbose:
+            if self.thermal:
+                print("# Thermal UEG activated.")
+
+            print(f"# Number of spin-up electrons: {self.nup:d}")
+            print(f"# Number of spin-down electrons: {self.ndown:d}")
+            print(f"# rs: {self.rs:6.4e}")
+            print(f"# Spin polarisation (zeta): {self.zeta:6.4e}")
+            print(f"# Electron density (rho): {self.rho:13.8e}")
+            print(f"# Box Length (L): {self.L:13.8e}")
+            print(f"# Volume: {self.vol:13.8e}")
+            print(f"# k-space factor (2pi/L): {self.kfac:13.8e}")
+
+
+    def build(self, verbose=False):
+        # Get plane wave basis vectors and corresponding eigenvalues.
+        self.sp_eigv, self.basis, self.nmax = self.sp_energies(
+                                                self.ktwist, self.kfac, self.ecut)
+        self.shifted_nmax = 2 * self.nmax
+        self.imax_sq = numpy.dot(self.basis[-1], self.basis[-1])
+        self.create_lookup_table()
+
+        for i, k in enumerate(self.basis):
+            assert i == self.lookup_basis(k)
+
+        # Number of plane waves.
+        self.nbasis = len(self.sp_eigv)
+        self.nactive = self.nbasis
+        self.ncore = 0
+        self.nfv = 0
+        self.mo_coeff = None
+        
+        # ---------------------------------------------------------------------
+        T = numpy.diag(self.sp_eigv)
+        h1e_mod = self.mod_one_body(T)
+        self.H1 = numpy.array([T, T]) # Making alpha and beta.
+        self.h1e_mod = numpy.array([h1e_mod, h1e_mod])
+
+        # ---------------------------------------------------------------------
+        # Allowed momentum transfers (4*ecut).
+        eigs, qvecs, self.qnmax = self.sp_energies(self.ktwist, self.kfac, 4 * self.ecut)
+
+        # Omit Q = 0 term.
+        self.qvecs = numpy.copy(qvecs[1:])
+        self.vqvec = numpy.array([self.vq(self.kfac * q) for q in self.qvecs])
+
+        # Number of momentum transfer vectors / auxiliary fields.
+        # Can reduce by symmetry but be stupid for the moment.
+        self.nchol = len(self.qvecs)
+        self.nfields = 2 * len(self.qvecs)
+        self.get_momentum_transfers()
+
+        if verbose:
+            print(f"# Number of plane waves: {self.nbasis:d}")
+            print(f"# Number of Cholesky vectors: {self.nchol:d}.")
+            print(f"# Number of auxiliary fields: {self.nfields:d}.")
+            print("# Constructing two-body potentials incore.")
+
+        # ---------------------------------------------------------------------
+        self.chol_vecs, self.iA, self.iB = self.two_body_potentials_incore()
+
+        if self.write_ints:
+            self.write_integrals()
+
+        if verbose:
+            print("# Approximate memory required for "
+                  "two-body potentials: {:13.8e} GB.".format((3 * self.iA.nnz * 16 / (1024**3))))
+            print("# Finished constructing two-body potentials.")
+            print("# Finished building UEG object.")
+
+
+    def sp_energies(self, ks, kfac, ecut):
+        """Calculate the allowed kvectors and resulting single particle eigenvalues (basically kinetic energy)
+        which can fit in the sphere in kspace determined by ecut.
+
+        Parameters
+        ----------
+        kfac : float
+            kspace grid spacing.
+
+        ecut : float
+            energy cutoff.
+
+        Returns
+        -------
+        spval : :class:`numpy.ndarray`
+            Array containing sorted single particle eigenvalues.
+
+        kval : :class:`numpy.ndarray`
+            Array containing basis vectors, sorted according to their
+            corresponding single-particle energy.
+        """
+
+        # Scaled Units to match with HANDE.
+        # So ecut is measured in units of 1/kfac^2.
+        nmax = int(numpy.ceil(numpy.sqrt((2 * ecut))))
+
+        spval = []
+        vec = []
+        kval = []
+
+        for ni in range(-nmax, nmax + 1):
+            for nj in range(-nmax, nmax + 1):
+                for nk in range(-nmax, nmax + 1):
+                    spe = 0.5 * (ni**2 + nj**2 + nk**2)
+
+                    if spe <= ecut:
+                        kijk = [ni, nj, nk]
+
+                        # Reintroduce 2 \pi / L factor.
+                        ek = 0.5 * numpy.dot(numpy.array(kijk) + ks, numpy.array(kijk) + ks)
+                        kval.append(kijk)
+                        spval.append(kfac**2 * ek)
+
+        # Sort the arrays in terms of increasing energy.
+        spval = numpy.array(spval)
+        ix = numpy.argsort(spval, kind="mergesort")
+        spval = spval[ix]
+        kval = numpy.array(kval)[ix]
+        return spval, kval, nmax
+
+
+    def create_lookup_table(self):
+        basis_ix = []
+        for k in self.basis:
+            basis_ix.append(self.map_basis_to_index(k))
+
+        self.lookup = numpy.zeros(max(basis_ix) + 1, dtype=int)
+
+        for i, b in enumerate(basis_ix):
+            self.lookup[b] = i
+
+        self.max_ix = max(basis_ix)
+
+
+    def lookup_basis(self, vec):
+        if numpy.dot(vec, vec) <= self.imax_sq:
+            ix = self.map_basis_to_index(vec)
+
+            if ix >= len(self.lookup):
+                ib = None
+
+            else:
+                ib = self.lookup[ix]
+
+            return ib
+
+        else:
+            ib = None
+
+
+    def map_basis_to_index(self, k):
+        return ((k[0] + self.nmax)
+                + self.shifted_nmax * (k[1] + self.nmax)
+                + self.shifted_nmax * self.shifted_nmax * (k[2] + self.nmax))
+
+
+    def get_momentum_transfers(self):
+        """Get arrays of plane wave basis vectors connected by momentum transfers Q."""
+        nlimit = self.nup
+        if self.thermal:
+            nlimit = self.nbasis
+
+        self.ikpq_i = []
+        self.ikpq_kpq = []
+
+        for iq, q in enumerate(self.qvecs):
+            idxkpq_list_i = []
+            idxkpq_list_kpq = []
+
+            for i, k in enumerate(self.basis[0:nlimit]):
+                kpq = k + q
+                idxkpq = self.lookup_basis(kpq)
+
+                if idxkpq is not None:
+                    idxkpq_list_i += [i]
+                    idxkpq_list_kpq += [idxkpq]
+
+            self.ikpq_i += [idxkpq_list_i]
+            self.ikpq_kpq += [idxkpq_list_kpq]
+        
+        # ---------------------------------------------------------------------
+        self.ipmq_i = []
+        self.ipmq_pmq = []
+
+        for iq, q in enumerate(self.qvecs):
+            idxpmq_list_i = []
+            idxpmq_list_pmq = []
+
+            for i, p in enumerate(self.basis[0:nlimit]):
+                pmq = p - q
+                idxpmq = self.lookup_basis(pmq)
+
+                if idxpmq is not None:
+                    idxpmq_list_i += [i]
+                    idxpmq_list_pmq += [idxpmq]
+
+            self.ipmq_i += [idxpmq_list_i]
+            self.ipmq_pmq += [idxpmq_list_pmq]
+
+        for iq, q in enumerate(self.qvecs):
+            self.ikpq_i[iq] = numpy.array(self.ikpq_i[iq], dtype=numpy.int64)
+            self.ikpq_kpq[iq] = numpy.array(self.ikpq_kpq[iq], dtype=numpy.int64)
+            self.ipmq_i[iq] = numpy.array(self.ipmq_i[iq], dtype=numpy.int64)
+            self.ipmq_pmq[iq] = numpy.array(self.ipmq_pmq[iq], dtype=numpy.int64)
+
+
+    def madelung(self):
+        """Use expression in Schoof et al. (PhysRevLett.115.130402) for the
+        Madelung contribution to the total energy fitted to L.M. Fraser et al.
+        Phys. Rev. B 53, 1814.
+
+        Parameters
+        ----------
+        rs : float
+            Wigner-Seitz radius.
+
+        ne : int
+            Number of electrons.
+
+        Returns
+        -------
+        v_M: float
+            Madelung potential (in Hartrees).
+        """
+        c1 = -2.837297
+        c2 = (3.0 / (4.0 * numpy.pi)) ** (1.0 / 3.0)
+        return c1 * c2 / (self.ne ** (1.0 / 3.0) * self.rs)
+
+    
+    def mod_one_body(self, T):
+        """Absorb the diagonal term of the two-body Hamiltonian to the one-body term.
+        Essentially adding the third term in Eq.(11b) of Phys. Rev. B 75, 245123.
+
+        Parameters
+        ----------
+        T : float
+            one-body Hamiltonian (i.e. kinetic energy)
+
+        Returns
+        -------
+        h1e_mod: float
+            modified one-body Hamiltonian
+        """
+        h1e_mod = numpy.copy(T)
+
+        fac = 1.0 / (2.0 * self.vol)
+        for i, ki in enumerate(self.basis):
+            for j, kj in enumerate(self.basis):
+                if i != j:
+                    q = self.kfac * (ki - kj)
+                    h1e_mod[i, i] = h1e_mod[i, i] - fac * self.vq(q)
+
+        return h1e_mod
+
+
+    def vq(self, q):
+        """The typical 3D Coulomb kernel
+
+        Parameters
+        ----------
+        q : float
+            a plane-wave vector
+
+        Returns
+        -------
+        v_M: float
+            3D Coulomb kernel (in Hartrees)
+        """
+        return 4 * numpy.pi / numpy.dot(q, q)
+
+
+    def density_operator(self, iq):
+        """Density operator as defined in Eq.(6) of Phys. Rev. B 75, 245123.
+
+        Parameters
+        ----------
+        q : float
+            a plane-wave vector
+
+        Returns
+        -------
+        rho_q: float
+            density operator
+        """
+        nnz = self.rho_ikpq_kpq[iq].shape[0]  # Number of non-zeros
+        ones = numpy.ones((nnz), dtype=numpy.complex128)
+        rho_q = scipy.sparse.csc_matrix(
+            (ones, (self.rho_ikpq_kpq[iq], self.rho_ikpq_i[iq])),
+            shape=(self.nbasis, self.nbasis),
+            dtype=numpy.complex128)
+        return rho_q
+
+
+    def scaled_density_operator_incore(self, transpose):
+        """Density operator as defined in Eq.(6) of PRB(75)245123
+
+        Parameters
+        ----------
+        q : float
+            a plane-wave vector
+
+        Returns
+        -------
+        rho_q: float
+            density operator
+        """
+        rho_ikpq_i = []
+        rho_ikpq_kpq = []
+
+        for iq, q in enumerate(self.qvecs):
+            idxkpq_list_i = []
+            idxkpq_list_kpq = []
+
+            for i, k in enumerate(self.basis):
+                kpq = k + q
+                idxkpq = self.lookup_basis(kpq)
+
+                if idxkpq is not None:
+                    idxkpq_list_i += [i]
+                    idxkpq_list_kpq += [idxkpq]
+
+            rho_ikpq_i += [idxkpq_list_i]
+            rho_ikpq_kpq += [idxkpq_list_kpq]
+
+        for iq, q in enumerate(self.qvecs):
+            rho_ikpq_i[iq] = numpy.array(rho_ikpq_i[iq], dtype=numpy.int64)
+            rho_ikpq_kpq[iq] = numpy.array(rho_ikpq_kpq[iq], dtype=numpy.int64)
+
+        nq = len(self.qvecs)
+        nnz = 0
+        for iq in range(nq):
+            nnz += rho_ikpq_kpq[iq].shape[0]
+
+        col_index = []
+        row_index = []
+        values = []
+
+        if transpose:
+            for iq in range(nq):
+                qscaled = self.kfac * self.qvecs[iq]
+                # Due to the HS transformation, we have to do pi / 2*vol as opposed to 2*pi / vol
+                piovol = numpy.pi / (self.vol)
+                factor = (piovol / numpy.dot(qscaled, qscaled)) ** 0.5
+
+                for innz, kpq in enumerate(rho_ikpq_kpq[iq]):
+                    row_index += [rho_ikpq_kpq[iq][innz] + rho_ikpq_i[iq][innz] * self.nbasis]
+                    col_index += [iq]
+                    values += [factor]
+
+        else:
+            for iq in range(nq):
+                qscaled = self.kfac * self.qvecs[iq]
+                # Due to the HS transformation, we have to do pi / 2*vol as opposed to 2*pi / vol
+                piovol = numpy.pi / (self.vol)
+                factor = (piovol / numpy.dot(qscaled, qscaled)) ** 0.5
+
+                for innz, kpq in enumerate(rho_ikpq_kpq[iq]):
+                    row_index += [rho_ikpq_kpq[iq][innz] * self.nbasis + rho_ikpq_i[iq][innz]]
+                    col_index += [iq]
+                    values += [factor]
+
+        rho_q = scipy.sparse.csc_matrix(
+            (values, (row_index, col_index)),
+            shape=(self.nbasis * self.nbasis, nq),
+            dtype=numpy.complex128)
+        return rho_q
+
+
+    def two_body_potentials_incore(self):
+        """Calculate A and B of Eq.(13) of PRB(75)245123 for a given plane-wave vector q
+
+        Returns
+        -------
+        iA : numpy array
+            Eq.(13a)
+
+        iB : numpy array
+            Eq.(13b)
+        """
+        rho_q = self.scaled_density_operator_incore(False)
+        rho_qH = self.scaled_density_operator_incore(True)
+        iA = 1j * (rho_q + rho_qH)
+        iB = -(rho_q - rho_qH)
+        return (rho_q, iA, iB)
+
+
+    def hijkl(self, i, j, k, l):
+        """Compute <ij|kl> = (ik|jl) = 1/Omega * 4pi/(kk-ki)**2
+
+        Checks for momentum conservation k_i + k_j = k_k + k_k, or
+        k_k - k_i = k_j - k_l.
+
+        Parameters
+        ----------
+        i, j, k, l : int
+            Orbital indices for integral (ik|jl) = <ij|kl>.
+
+        Returns
+        -------
+        integral : float
+            (ik|jl)
+        """
+        q1 = self.basis[k] - self.basis[i]
+        q2 = self.basis[j] - self.basis[l]
+
+        if numpy.dot(q1, q1) > 1e-12 and numpy.dot(q1 - q2, q1 - q2) < 1e-12:
+            return 1.0 / self.vol * self.vq(self.kfac * q1)
+
+        else:
+            return 0.0
+
+
+    def compute_real_transformation(self):
+        U22 = numpy.zeros((2, 2), dtype=numpy.complex128)
+        U22[0, 0] = 1.0 / numpy.sqrt(2.0)
+        U22[0, 1] = 1.0 / numpy.sqrt(2.0)
+        U22[1, 0] = -1.0j / numpy.sqrt(2.0)
+        U22[1, 1] = 1.0j / numpy.sqrt(2.0)
+
+        U = numpy.zeros((self.nbasis, self.nbasis), dtype=numpy.complex128)
+
+        for i, b in enumerate(self.basis):
+            if numpy.sum(b * b) == 0:
+                U[i, i] = 1.0
+
+            else:
+                mb = -b
+                diff = numpy.einsum("ij->i", (self.basis - mb) ** 2)
+                idx = numpy.argwhere(diff == 0)
+                assert idx.ravel().shape[0] == 1
+
+                if i < idx:
+                    idx = idx.ravel()[0]
+                    U[i, i] = U22[0, 0]
+                    U[i, idx] = U22[0, 1]
+                    U[idx, i] = U22[1, 0]
+                    U[idx, idx] = U22[1, 1]
+
+                else:
+                    continue
+
+        U = U.T.copy()
+        return U
+
+
+    def eri_4(self):
+        eri_chol = 4 * self.chol_vecs.dot(self.chol_vecs.T)
+        eri_chol = (
+            eri_chol.toarray().reshape((self.nbasis, self.nbasis, self.nbasis, self.nbasis)).real)
+        eri_chol = eri_chol.transpose(0, 1, 3, 2)
+        return eri_chol
+
+
+    def eri_8(self):
+        """Compute 8-fold symmetric integrals. Useful for running standard 
+        quantum chemistry methods,"""
+        eri = self.eri_4()
+        U = self.compute_real_transformation()
+        eri0 = numpy.einsum("mp,mnls->pnls", U.conj(), eri, optimize=True)
+        eri1 = numpy.einsum("nq,pnls->pqls", U, eri0, optimize=True)
+        eri2 = numpy.einsum("lr,pqls->pqrs", U.conj(), eri1, optimize=True)
+        eri3 = numpy.einsum("st,pqrs->pqrt", U, eri2, optimize=True).real
+        return eri3
+    
+
+    def write_integrals(self, filename="ueg_integrals.h5"):
+        write_qmcpack_sparse(
+            self.H1[0],
+            2 * self.chol_vecs.toarray(),
+            self.nelec,
+            self.nbasis,
+            #enuc=self.ecore,
+            enuc=0.,
+            filename=filename)
+
diff --git a/ipie/_version.py b/ipie/_version.py
index 5b671249..532407e0 100644
--- a/ipie/_version.py
+++ b/ipie/_version.py
@@ -13,5 +13,4 @@
 # limitations under the License.
 #
 # Authors: Fionn Malone <fmalone@google.com>
-
-__version__ = "0.7.0"
+__version__ = "0.7.1"
diff --git a/ipie/addons/__init__.py b/ipie/addons/__init__.py
new file mode 100644
index 00000000..871770c1
--- /dev/null
+++ b/ipie/addons/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Directory for additions to ipie which depend on the core ipie library.
+# New features should mirror the ipie layout e.g.
+# ipie/addons/finite_temperature/qmc/afqmc.py etc.
diff --git a/ipie/addons/free_projection/__init__.py b/ipie/addons/free_projection/__init__.py
new file mode 100644
index 00000000..e2aed039
--- /dev/null
+++ b/ipie/addons/free_projection/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/ipie/addons/free_projection/analysis/__init__.py b/ipie/addons/free_projection/analysis/__init__.py
new file mode 100644
index 00000000..e2aed039
--- /dev/null
+++ b/ipie/addons/free_projection/analysis/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/ipie/addons/free_projection/analysis/extraction.py b/ipie/addons/free_projection/analysis/extraction.py
new file mode 100755
index 00000000..8292de0e
--- /dev/null
+++ b/ipie/addons/free_projection/analysis/extraction.py
@@ -0,0 +1,65 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee <linusjoonho@gmail.com>
+#
+
+
+import h5py
+import numpy
+import pandas as pd
+
+
+def extract_hdf5_data(filename, block_idx=1):
+    shapes = {}
+    with h5py.File(filename, "r") as fh5:
+        keys = fh5[f"block_size_{block_idx}/data/"].keys()
+        shape_keys = fh5[f"block_size_{block_idx}/shape/"].keys()
+        data = numpy.concatenate([fh5[f"block_size_{block_idx}/data/{d}"][:] for d in keys])
+        for k in shape_keys:
+            shapes[k] = {
+                "names": fh5[f"block_size_{block_idx}/names/{k}"][()],
+                "shape": fh5[f"block_size_{block_idx}/shape/{k}"][:],
+                "offset": fh5[f"block_size_{block_idx}/offset/{k}"][()],
+                "size": fh5[f"block_size_{block_idx}/size/{k}"][()],
+                "scalar": bool(fh5[f"block_size_{block_idx}/scalar/{k}"][()]),
+                "num_walker_props": fh5[f"block_size_{block_idx}/num_walker_props"][()],
+                "walker_header": fh5[f"block_size_{block_idx}/walker_prop_header"][()],
+            }
+        size_keys = fh5[f"block_size_{block_idx}/max_block"].keys()
+        max_block = sum(fh5[f"block_size_{block_idx}/max_block/{d}"][()] for d in size_keys)
+
+    return data[: max_block + 1], shapes
+
+
+def extract_observable(filename, name="energy", block_idx=1):
+    data, info = extract_hdf5_data(filename, block_idx=block_idx)
+    obs_info = info.get(name)
+    if obs_info is None:
+        raise RuntimeError(f"Unknown value for name={name}")
+    obs_slice = slice(obs_info["offset"], obs_info["offset"] + obs_info["size"])
+    if obs_info["scalar"]:
+        obs_data = data[:, obs_slice].reshape((-1,) + tuple(obs_info["shape"]))
+        nwalk_prop = obs_info["num_walker_props"]
+        weight_data = data[:, :nwalk_prop].reshape((-1, nwalk_prop))
+        results = pd.DataFrame(numpy.hstack([weight_data, obs_data]))
+        header = list(obs_info["walker_header"]) + obs_info["names"].split()
+        results.columns = [n.decode("utf-8") for n in header]
+        return results
+    else:
+        obs_data = data[:, obs_slice]
+        nsamp = data.shape[0]
+        walker_averaged = obs_data[:, :-1] / obs_data[:, -1].reshape((nsamp, -1))
+        return walker_averaged.reshape((nsamp,) + tuple(obs_info["shape"]))
diff --git a/ipie/addons/free_projection/analysis/jackknife.py b/ipie/addons/free_projection/analysis/jackknife.py
new file mode 100755
index 00000000..359e58f9
--- /dev/null
+++ b/ipie/addons/free_projection/analysis/jackknife.py
@@ -0,0 +1,52 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+#!/usr/bin/env python
+
+import numpy
+
+
+def jackknife_ratios(num: numpy.ndarray, denom: numpy.ndarray):
+    r"""Jackknife estimation of standard deviation of the ratio of means.
+
+    Parameters
+    ----------
+    num : :class:`np.ndarray
+        Numerator samples.
+    denom : :class:`np.ndarray`
+        Denominator samples.
+
+    Returns
+    -------
+    mean : :class:`np.ndarray`
+        Ratio of means.
+    sigma : :class:`np.ndarray`
+        Standard deviation of the ratio of means.
+    """
+    n_samples = num.size
+    num_mean = numpy.mean(num)
+    denom_mean = numpy.mean(denom)
+    mean = num_mean / denom_mean
+    jackknife_estimates = numpy.zeros(n_samples, dtype=num.dtype)
+    for i in range(n_samples):
+        mean_num_i = (num_mean * n_samples - num[i]) / (n_samples - 1)
+        mean_denom_i = (denom_mean * n_samples - denom[i]) / (n_samples - 1)
+        jackknife_estimates[i] = (mean_num_i / mean_denom_i).real
+    mean = numpy.mean(jackknife_estimates)
+    sigma = numpy.sqrt((n_samples - 1) * numpy.var(jackknife_estimates))
+    return mean, sigma
diff --git a/ipie/addons/free_projection/analysis/tests/__init__.py b/ipie/addons/free_projection/analysis/tests/__init__.py
new file mode 100644
index 00000000..e2aed039
--- /dev/null
+++ b/ipie/addons/free_projection/analysis/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/ipie/addons/free_projection/analysis/tests/test_jackknife.py b/ipie/addons/free_projection/analysis/tests/test_jackknife.py
new file mode 100644
index 00000000..bbb454e7
--- /dev/null
+++ b/ipie/addons/free_projection/analysis/tests/test_jackknife.py
@@ -0,0 +1,36 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+import pytest
+
+from ipie.addons.free_projection.analysis.jackknife import jackknife_ratios
+
+
+@pytest.mark.unit
+def test_jackknife_ratios():
+    numpy.random.seed(0)
+    num = numpy.random.randn(100) + 0.0j
+    denom = numpy.ones(100)
+    mean, sigma = jackknife_ratios(num, denom)
+    assert numpy.isclose(mean, num.sum() / denom.sum())
+    assert numpy.isclose(sigma, 0.1, atol=0.01)
+
+
+if __name__ == "__main__":
+    test_jackknife_ratios()
diff --git a/ipie/addons/free_projection/estimators/__init__.py b/ipie/addons/free_projection/estimators/__init__.py
new file mode 100644
index 00000000..e2aed039
--- /dev/null
+++ b/ipie/addons/free_projection/estimators/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/ipie/addons/free_projection/estimators/energy.py b/ipie/addons/free_projection/estimators/energy.py
new file mode 100644
index 00000000..2425dfa6
--- /dev/null
+++ b/ipie/addons/free_projection/estimators/energy.py
@@ -0,0 +1,42 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Fionn Malone <fmalone@google.com>
+#
+
+
+from ipie.estimators.energy import EnergyEstimator, local_energy
+from ipie.utils.backend import arraylib as xp
+
+
+class EnergyEstimatorFP(EnergyEstimator):
+    def __init__(
+        self,
+        system=None,
+        ham=None,
+        trial=None,
+        filename=None,
+    ):
+        super().__init__(system, ham, trial, filename)
+
+    def compute_estimator(self, system, walkers, hamiltonian, trial):
+        trial.calc_greens_function(walkers)
+        # Need to be able to dispatch here
+        energy = local_energy(system, hamiltonian, walkers, trial)
+        self._data["ENumer"] = xp.sum(walkers.weight * walkers.phase * walkers.ovlp * energy[:, 0])
+        self._data["EDenom"] = xp.sum(walkers.weight * walkers.phase * walkers.ovlp)
+        self._data["E1Body"] = xp.sum(walkers.weight * walkers.phase * walkers.ovlp * energy[:, 1])
+        self._data["E2Body"] = xp.sum(walkers.weight * walkers.phase * walkers.ovlp * energy[:, 2])
+
+        return self.data
diff --git a/ipie/addons/free_projection/estimators/handler.py b/ipie/addons/free_projection/estimators/handler.py
new file mode 100644
index 00000000..be821505
--- /dev/null
+++ b/ipie/addons/free_projection/estimators/handler.py
@@ -0,0 +1,135 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+"""Routines and classes for estimation of observables."""
+
+from __future__ import print_function
+
+from typing import Tuple, Union
+
+import h5py
+import numpy
+
+from ipie.addons.free_projection.estimators.energy import EnergyEstimatorFP
+from ipie.config import MPI
+from ipie.estimators.handler import EstimatorHandler
+from ipie.estimators.utils import H5EstimatorHelper
+
+
+class EstimatorHandlerFP(EstimatorHandler):
+    def __init__(
+        self,
+        comm,
+        system,
+        hamiltonian,
+        trial,
+        walker_state=None,
+        verbose: bool = False,
+        filename: Union[str, None] = None,
+        block_size: int = 1,
+        basename: str = "estimates",
+        overwrite=True,
+        observables: Tuple[str] = ("energy",),  # TODO: Use factory method!
+        index: int = 0,
+    ):
+        super().__init__(
+            comm,
+            system,
+            hamiltonian,
+            trial,
+            walker_state,
+            verbose,
+            filename,
+            block_size,
+            basename,
+            overwrite,
+            observables,
+            index,
+        )
+        self["energy"] = EnergyEstimatorFP(
+            system=system,
+            ham=hamiltonian,
+            trial=trial,
+        )
+
+    def initialize(self, comm, print_header=True):
+        self.local_estimates = numpy.zeros(
+            (self.size + self.num_walker_props), dtype=numpy.complex128
+        )
+        self.global_estimates = numpy.zeros(
+            (self.size + self.num_walker_props), dtype=numpy.complex128
+        )
+        header = f"{'Iter':>17s} {'TimeStep':>10s} "
+        for k, e in self.items():
+            if e.print_to_stdout:
+                header += e.header_to_text
+        if comm.rank == 0:
+            with h5py.File(self.filename, "w") as fh5:
+                pass
+            self.dump_metadata()
+        self.output = H5EstimatorHelper(
+            self.filename,
+            base="block_size_1",
+            chunk_size=self.buffer_size,
+            shape=(self.size + self.num_walker_props,),
+        )
+        if comm.rank == 0:
+            with h5py.File(self.filename, "r+") as fh5:
+                fh5["block_size_1/num_walker_props"] = self.num_walker_props
+                fh5["block_size_1/walker_prop_header"] = self.walker_header
+                for k, o in self.items():
+                    fh5[f"block_size_1/shape/{k}"] = o.shape
+                    fh5[f"block_size_1/size/{k}"] = o.size
+                    fh5[f"block_size_1/scalar/{k}"] = int(o.scalar_estimator)
+                    fh5[f"block_size_1/names/{k}"] = " ".join(name for name in o.names)
+                    fh5[f"block_size_1/offset/{k}"] = self.num_walker_props + self.get_offset(k)
+        if comm.rank == 0 and print_header:
+            print(header)
+
+    def print_block(self, comm, block, walker_factors, div_factor=None, time_step=0):
+        self.local_estimates[: walker_factors.size] = walker_factors.buffer
+        comm.Reduce(self.local_estimates, self.global_estimates, op=MPI.SUM)
+        output_string = " "
+        # Get walker data.
+        offset = walker_factors.size
+        for k, e in self.items():
+            if comm.rank == 0:
+                start = offset + self.get_offset(k)
+                end = start + int(self[k].size)
+                est_data = self.global_estimates[start:end]
+                e.post_reduce_hook(est_data)
+                est_string = e.data_to_text(est_data)
+                e.to_ascii_file(est_string)
+                if e.print_to_stdout:
+                    output_string += est_string
+        if comm.rank == 0:
+            shift = self.global_estimates[walker_factors.get_index("HybridEnergy")]
+
+        else:
+            shift = None
+        walker_factors.eshift = comm.bcast(shift)
+        if comm.rank == 0:
+            self.output.push_to_chunk(self.global_estimates, f"data")
+            self.output.increment()
+        if comm.rank == 0:
+            if time_step == 0:
+                print(f"{block:>17d} {time_step:>10d}" + output_string)
+            else:
+                blank = ""
+                print(f"{blank:>17s} {time_step:>10d}" + output_string)
+        self.zero()
diff --git a/ipie/addons/free_projection/estimators/tests/__init__.py b/ipie/addons/free_projection/estimators/tests/__init__.py
new file mode 100644
index 00000000..e2aed039
--- /dev/null
+++ b/ipie/addons/free_projection/estimators/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/ipie/addons/free_projection/estimators/tests/test_estimators.py b/ipie/addons/free_projection/estimators/tests/test_estimators.py
new file mode 100644
index 00000000..7bf692ab
--- /dev/null
+++ b/ipie/addons/free_projection/estimators/tests/test_estimators.py
@@ -0,0 +1,81 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Fionn Malone <fmalone@google.com>
+#
+
+import tempfile
+
+import pytest
+
+from ipie.addons.free_projection.estimators.energy import EnergyEstimatorFP
+from ipie.addons.free_projection.estimators.handler import EstimatorHandlerFP
+from ipie.utils.testing import gen_random_test_instances
+
+
+@pytest.mark.unit
+def test_energy_fp_estimator():
+    nmo = 10
+    nocc = 8
+    naux = 30
+    nwalker = 10
+    system, ham, walker_batch, trial = gen_random_test_instances(nmo, nocc, naux, nwalker)
+    estim = EnergyEstimatorFP(system=system, ham=ham, trial=trial)
+    estim.compute_estimator(system, walker_batch, ham, trial)
+    assert len(estim.names) == 5
+    tmp = estim.data.copy()
+    estim.post_reduce_hook(tmp)
+    assert estim.print_to_stdout
+    assert estim.ascii_filename == None
+    assert estim.shape == (5,)
+    data_to_text = estim.data_to_text(tmp)
+    assert len(data_to_text.split()) == 5
+
+
+@pytest.mark.unit
+def test_estimator_handler_fp():
+    with tempfile.NamedTemporaryFile() as tmp1, tempfile.NamedTemporaryFile() as tmp2:
+        nmo = 10
+        nocc = 8
+        naux = 30
+        nwalker = 10
+        system, ham, walker_batch, trial = gen_random_test_instances(nmo, nocc, naux, nwalker)
+        estim = EnergyEstimatorFP(system=system, ham=ham, trial=trial, filename=tmp1.name)
+        estim.print_to_stdout = False
+        from ipie.config import MPI
+
+        comm = MPI.COMM_WORLD
+        handler = EstimatorHandlerFP(
+            comm,
+            system,
+            ham,
+            trial,
+            block_size=10,
+            observables=("energy",),
+            filename=tmp2.name,
+        )
+        handler["energy1"] = estim
+        handler.json_string = ""
+        handler.initialize(comm)
+        handler.compute_estimators(
+            system=system, hamiltonian=ham, trial=trial, walker_batch=walker_batch
+        )
+        handler.compute_estimators(
+            system=system, hamiltonian=ham, trial=trial, walker_batch=walker_batch
+        )
+
+
+if __name__ == "__main__":
+    test_energy_fp_estimator()
+    test_estimator_handler_fp()
diff --git a/ipie/addons/free_projection/propagation/__init__.py b/ipie/addons/free_projection/propagation/__init__.py
new file mode 100644
index 00000000..e2aed039
--- /dev/null
+++ b/ipie/addons/free_projection/propagation/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/ipie/addons/free_projection/propagation/free_propagation.py b/ipie/addons/free_projection/propagation/free_propagation.py
new file mode 100644
index 00000000..21d841c6
--- /dev/null
+++ b/ipie/addons/free_projection/propagation/free_propagation.py
@@ -0,0 +1,92 @@
+import time
+
+import numpy
+
+from ipie.propagation.phaseless_base import (
+    construct_mean_field_shift,
+    construct_one_body_propagator,
+)
+from ipie.propagation.phaseless_generic import PhaselessGeneric
+from ipie.utils.backend import arraylib as xp
+from ipie.utils.backend import synchronize
+
+
+class FreePropagation(PhaselessGeneric):
+    """fp-afqmc propagator"""
+
+    def __init__(
+        self, time_step: float, exp_nmax: int = 6, verbose: bool = False, ene_0: float = 0.0
+    ) -> None:
+        super().__init__(time_step, exp_nmax=exp_nmax, verbose=verbose)
+        self.e_shift = ene_0  # unlike the dynamic shift in phaseless, this is a constant shift
+
+    def build(self, hamiltonian, trial=None, walkers=None, mpi_handler=None, verbose=False):
+        # dt/2 one-body propagator
+        start = time.time()
+        self.mf_shift = construct_mean_field_shift(hamiltonian, trial)
+        if verbose:
+            print(f"# Time to mean field shift: {time.time() - start} s")
+            print(
+                "# Absolute value of maximum component of mean field shift: "
+                "{:13.8e}.".format(numpy.max(numpy.abs(self.mf_shift)))
+            )
+        # construct one-body propagator
+        self.expH1 = construct_one_body_propagator(hamiltonian, self.mf_shift, self.dt)
+
+        # # Allocate force bias (we don't need to do this here - it will be allocated when it is needed)
+        self.vbias = None
+        # self.vbias = numpy.zeros((walkers.nwalkers, hamiltonian.nfields),
+        #                         dtype=numpy.complex128)
+        self.e_shift_1 = -hamiltonian.ecore - xp.sum(self.mf_shift**2) / 2.0
+
+    def propagate_walkers_two_body(self, walkers, hamiltonian, trial):
+        # Normally distrubted auxiliary fields.
+        xi = (
+            xp.random.normal(0.0, 1.0, hamiltonian.nfields * walkers.nwalkers).reshape(
+                walkers.nwalkers, hamiltonian.nfields
+            )
+            + 0.0j
+        )
+
+        # Constant factor arising from mean field shift
+        cmf = xp.exp(-self.sqrt_dt * xp.einsum("wx,x->w", xi, self.mf_shift))
+        # Constant factor arising from shifting the propability distribution.
+        ceshift = xp.exp(self.dt * (self.e_shift + self.e_shift_1))
+        xi = xi.T.copy()
+        self.apply_VHS(walkers, hamiltonian, xi)
+
+        return cmf, ceshift
+
+    def propagate_walkers(self, walkers, hamiltonian, trial, eshift):
+        synchronize()
+        start_time = time.time()
+        ovlp = trial.calc_greens_function(walkers)
+        synchronize()
+        self.timer.tgf += time.time() - start_time
+
+        # 2. Update Slater matrix
+        # 2.a Apply one-body
+        self.propagate_walkers_one_body(walkers)
+
+        # 2.b Apply two-body
+        (cmf, ceshift) = self.propagate_walkers_two_body(walkers, hamiltonian, trial)
+
+        # 2.c Apply one-body
+        self.propagate_walkers_one_body(walkers)
+
+        start_time = time.time()
+        ovlp_new = trial.calc_overlap(walkers)
+        synchronize()
+        self.timer.tovlp += time.time() - start_time
+
+        start_time = time.time()
+        self.update_weight(walkers, ovlp, ovlp_new, ceshift, cmf, eshift)
+        synchronize()
+        self.timer.tupdate += time.time() - start_time
+
+    def update_weight(self, walkers, ovlp, ovlp_new, cfb, cmf, eshift):
+        # weights in fp keep track of the walker normalization
+        magn, dtheta = xp.abs(cfb * cmf), xp.angle(cfb * cmf)
+        walkers.weight *= magn
+        walkers.phase *= xp.exp(1j * dtheta)
+        walkers.ovlp = ovlp_new
diff --git a/ipie/addons/free_projection/propagation/tests/__init__.py b/ipie/addons/free_projection/propagation/tests/__init__.py
new file mode 100644
index 00000000..e2aed039
--- /dev/null
+++ b/ipie/addons/free_projection/propagation/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/ipie/addons/free_projection/propagation/tests/test_generic.py b/ipie/addons/free_projection/propagation/tests/test_generic.py
new file mode 100644
index 00000000..0a3744a2
--- /dev/null
+++ b/ipie/addons/free_projection/propagation/tests/test_generic.py
@@ -0,0 +1,70 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Joonho Lee
+#          Fionn Malone <fionn.malone@gmail.com>
+#
+
+import numpy
+import pytest
+
+from ipie.addons.free_projection.propagation.free_propagation import FreePropagation
+from ipie.utils.misc import dotdict
+from ipie.utils.testing import build_test_case_handlers
+
+
+@pytest.mark.unit
+def test_free_projection():
+    numpy.random.seed(7)
+    nmo = 10
+    nelec = (6, 5)
+    nwalkers = 8
+    nsteps = 25
+    qmc = dotdict(
+        {
+            "dt": 0.005,
+            "nstblz": 5,
+            "nwalkers": nwalkers,
+            "batched": False,
+            "hybrid": True,
+            "num_steps": nsteps,
+        }
+    )
+    qmc.batched = True
+    batched_data = build_test_case_handlers(nelec, nmo, num_dets=1, options=qmc, seed=7)
+    prop_fp = FreePropagation(time_step=0.005, verbose=False, ene_0=-1.0)
+    prop_fp.build(batched_data.hamiltonian, batched_data.trial)
+
+    prop_fp.propagate_walkers(
+        batched_data.walkers, batched_data.hamiltonian, batched_data.trial, -1.0
+    )
+    assert batched_data.walkers.phia.shape == (nwalkers, nmo, nelec[0])
+    assert batched_data.walkers.phib.shape == (nwalkers, nmo, nelec[1])
+    assert numpy.allclose(
+        numpy.sum(batched_data.walkers.phase), 7.926221838159645 + 0.3971467053264697j
+    )
+    assert numpy.allclose(numpy.sum(batched_data.walkers.weight), 1.7901505653712695)
+    assert numpy.allclose(
+        numpy.sum(batched_data.walkers.ovlp), -6.40187371404052e-05 - 2.34160780650416e-05j
+    )
+    assert numpy.allclose(
+        numpy.sum(batched_data.walkers.phia), 33.95629475599705 - 0.30274130601759786j
+    )
+    assert numpy.allclose(
+        numpy.sum(batched_data.walkers.phib), 41.45587700725909 - 2.8023497141639413j
+    )
+
+
+if __name__ == "__main__":
+    test_free_projection()
diff --git a/ipie/addons/free_projection/qmc/__init__.py b/ipie/addons/free_projection/qmc/__init__.py
new file mode 100644
index 00000000..e2aed039
--- /dev/null
+++ b/ipie/addons/free_projection/qmc/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/ipie/addons/free_projection/qmc/calc.py b/ipie/addons/free_projection/qmc/calc.py
new file mode 100644
index 00000000..d22ccb45
--- /dev/null
+++ b/ipie/addons/free_projection/qmc/calc.py
@@ -0,0 +1,159 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+"""Helper Routines for setting up a calculation"""
+# todo : handle more gracefully.
+from ipie.addons.free_projection.estimators.energy import local_energy
+from ipie.addons.free_projection.propagation.free_propagation import FreePropagation
+from ipie.addons.free_projection.qmc.fp_afqmc import FPAFQMC
+from ipie.addons.free_projection.qmc.options import QMCParamsFP
+from ipie.config import MPI
+from ipie.hamiltonians.utils import get_hamiltonian
+from ipie.systems.utils import get_system
+from ipie.trial_wavefunction.utils import get_trial_wavefunction
+from ipie.utils.io import get_input_value
+from ipie.utils.mpi import MPIHandler
+from ipie.walkers.walkers_dispatch import get_initial_walker, UHFWalkersTrial
+
+
+def build_fpafqmc_driver(
+    comm,
+    nelec: tuple,
+    wavefunction_file: str = "wavefunction.h5",
+    hamiltonian_file: str = "hamiltonian.h5",
+    estimator_filename: str = "estimates.0.h5",
+    seed: int = None,
+    qmc_options: dict = None,
+):
+    options = {
+        "system": {"nup": nelec[0], "ndown": nelec[1]},
+        "qmc": {"rng_seed": seed},
+        "hamiltonian": {"integrals": hamiltonian_file},
+        "trial": {"filename": wavefunction_file},
+        "estimators": {"overwrite": True, "filename": estimator_filename},
+    }
+    if qmc_options is not None:
+        options["qmc"].update(qmc_options)
+    return get_driver_fp(options, comm)
+
+
+def get_driver_fp(options: dict, comm: MPI.COMM_WORLD) -> FPAFQMC:
+    verbosity = options.get("verbosity", 1)
+    qmc_opts = get_input_value(options, "qmc", default={}, alias=["qmc_options"])
+    sys_opts = get_input_value(
+        options, "system", default={}, alias=["model"], verbose=verbosity > 1
+    )
+    ham_opts = get_input_value(options, "hamiltonian", default={}, verbose=verbosity > 1)
+    # backward compatibility with previous code (to be removed)
+    for item in sys_opts.items():
+        if item[0].lower() == "name" and "name" in ham_opts.keys():
+            continue
+        ham_opts[item[0]] = item[1]
+
+    twf_opt = get_input_value(
+        options, "trial", default={}, alias=["trial_wavefunction"], verbose=verbosity > 1
+    )
+
+    wlk_opts = get_input_value(
+        options, "walkers", default={}, alias=["walker", "walker_opts"], verbose=verbosity > 1
+    )
+    wlk_opts["pop_control"] = wlk_opts.get("pop_control", "pair_branch")
+    wlk_opts["population_control"] = wlk_opts["pop_control"]
+
+    beta = get_input_value(qmc_opts, "beta", default=None)
+    if comm.rank != 0:
+        verbosity = 0
+    batched = get_input_value(qmc_opts, "batched", default=True, verbose=verbosity)
+
+    if beta is not None or batched == False:
+        raise ValueError("Trying to use legacy features which aren't supported.")
+    else:
+        from ipie.qmc.options import QMCOpts
+
+        qmc = QMCOpts(qmc_opts, verbose=0)
+        mpi_handler = MPIHandler(nmembers=qmc_opts.get("nmembers", 1), verbose=verbosity)
+        system = get_system(
+            sys_opts, verbose=verbosity, comm=comm
+        )  # Have to deal with shared comm in the future. I think we will remove this...
+        ham_file = get_input_value(ham_opts, "integrals", None, verbose=verbosity)
+        if ham_file is None:
+            raise ValueError("Hamiltonian filename not specified.")
+        pack_chol = get_input_value(
+            ham_opts, "symmetry", True, alias=["pack_chol", "pack_cholesky"], verbose=verbosity
+        )
+        hamiltonian = get_hamiltonian(
+            ham_file, mpi_handler.scomm, pack_chol=pack_chol, verbose=verbosity
+        )
+        wfn_file = get_input_value(twf_opt, "filename", default="", alias=["wfn_file"])
+        num_elec = (system.nup, system.ndown)
+        trial = get_trial_wavefunction(
+            num_elec,
+            hamiltonian.nbasis,
+            wfn_file,
+            verbose=verbosity,
+            ndets=get_input_value(twf_opt, "ndets", default=1, alias=["num_dets"]),
+            ndets_props=get_input_value(
+                twf_opt, "ndets_props", default=1, alias=["num_dets_props"]
+            ),
+            ndet_chunks=get_input_value(
+                twf_opt, "ndet_chunks", default=1, alias=["num_det_chunks"]
+            ),
+        )
+        trial.half_rotate(hamiltonian, mpi_handler.scomm)
+        if trial.compute_trial_energy:
+            trial.calculate_energy(system, hamiltonian)
+            trial.e1b = comm.bcast(trial.e1b, root=0)
+            trial.e2b = comm.bcast(trial.e2b, root=0)
+        comm.barrier()
+        _, initial_walker = get_initial_walker(trial)
+        walkers = UHFWalkersTrial(
+            trial,
+            initial_walker,
+            system.nup,
+            system.ndown,
+            hamiltonian.nbasis,
+            qmc.nwalkers,
+            mpi_handler,
+        )
+        walkers.build(trial)  # any intermediates that require information from trial
+        params = QMCParamsFP(
+            num_walkers=qmc.nwalkers,
+            total_num_walkers=qmc.nwalkers * comm.size,
+            num_blocks=qmc.nblocks,
+            num_steps_per_block=qmc.nsteps,
+            timestep=qmc.dt,
+            num_stblz=qmc.nstblz,
+            pop_control_freq=qmc.npop_control,
+            rng_seed=qmc.rng_seed,
+            num_iterations_fp=get_input_value(qmc_opts, "num_iterations_fp", 1),
+        )
+        ene_0 = local_energy(system, hamiltonian, walkers, trial)[0][0]
+        propagator = FreePropagation(time_step=params.timestep, exp_nmax=10, ene_0=ene_0)
+        propagator.build(hamiltonian, trial, walkers, mpi_handler)
+        afqmc = FPAFQMC(
+            system,
+            hamiltonian,
+            trial,
+            walkers,
+            propagator,
+            mpi_handler,
+            params,
+            verbose=(verbosity and comm.rank == 0),
+        )
+
+    return afqmc
diff --git a/ipie/addons/free_projection/qmc/fp_afqmc.py b/ipie/addons/free_projection/qmc/fp_afqmc.py
new file mode 100644
index 00000000..f0d971ca
--- /dev/null
+++ b/ipie/addons/free_projection/qmc/fp_afqmc.py
@@ -0,0 +1,383 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee <linusjoonho@gmail.com>
+#
+
+"""Driver to perform AFQMC calculation"""
+import json
+import time
+from typing import Dict, Optional, Tuple
+
+from ipie.addons.free_projection.estimators.handler import EstimatorHandlerFP
+from ipie.addons.free_projection.propagation.free_propagation import FreePropagation
+from ipie.addons.free_projection.qmc.options import QMCParamsFP
+from ipie.addons.free_projection.walkers.uhf_walkers import UHFWalkersFP
+from ipie.estimators.estimator_base import EstimatorBase
+from ipie.hamiltonians.utils import get_hamiltonian
+from ipie.qmc.afqmc import AFQMC
+from ipie.trial_wavefunction.utils import get_trial_wavefunction
+from ipie.utils.backend import synchronize
+from ipie.utils.io import to_json
+from ipie.utils.mpi import MPIHandler
+from ipie.walkers.base_walkers import WalkerAccumulator
+from ipie.walkers.walkers_dispatch import get_initial_walker
+
+
+class FPAFQMC(AFQMC):
+    """Free projection AFQMC driver."""
+
+    def __init__(
+        self,
+        system,
+        hamiltonian,
+        trial,
+        walkers,
+        propagator,
+        mpi_handler,
+        params: QMCParamsFP,
+        verbose: int = 0,
+    ):
+        super().__init__(
+            system, hamiltonian, trial, walkers, propagator, mpi_handler, params, verbose=verbose
+        )
+
+    @staticmethod
+    # TODO: wavefunction type, trial type, hamiltonian type
+    def build(
+        num_elec: Tuple[int, int],
+        hamiltonian,
+        trial_wavefunction,
+        walkers=None,
+        num_walkers: int = 100,
+        seed: Optional[int] = None,
+        num_steps_per_block: int = 25,
+        num_blocks: int = 100,
+        timestep: float = 0.005,
+        stabilize_freq=5,
+        pop_control_freq=-1,
+        verbose=True,
+        mpi_handler=None,
+        ene_0=0.0,
+        num_iterations_fp=1,
+    ) -> "FPAFQMC":
+        """Factory method to build AFQMC driver from hamiltonian and trial wavefunction.
+
+        Parameters
+        ----------
+        num_elec: tuple(int, int)
+            Number of alpha and beta electrons.
+        hamiltonian :
+            Hamiltonian describing the system.
+        trial_wavefunction :
+            Trial wavefunction
+        num_walkers : int
+            Number of walkers per MPI process used in the simulation. The TOTAL
+                number of walkers is num_walkers * number of processes.
+        num_steps_per_block : int
+            Number of Monte Carlo steps before estimators are evaluatied.
+                Default 25.
+        num_blocks : int
+            Number of blocks to perform. Total number of steps = num_blocks *
+                num_steps_per_block.
+        timestep : float
+            Imaginary timestep. Default 0.005.
+        stabilize_freq : float
+            Frequency at which to perform QR factorization of walkers (in units
+                of steps.) Default 25.
+        pop_control_freq : int
+            Not performed in free projection.
+        verbose : bool
+            Log verbosity. Default True i.e. print information to stdout.
+        ene_0 : float
+            Energy guess for the desired state.
+        num_iterations_fp : int
+            Number of iterations of free projection.
+        """
+
+        driver = AFQMC.build(
+            num_elec,
+            hamiltonian,
+            trial_wavefunction,
+            walkers,
+            num_walkers,
+            seed,
+            num_steps_per_block,
+            num_blocks,
+            timestep,
+            stabilize_freq,
+            pop_control_freq,
+            verbose,
+            mpi_handler,
+        )
+        if mpi_handler is None:
+            mpi_handler = MPIHandler()
+            comm = mpi_handler.comm
+        else:
+            comm = mpi_handler.comm
+        fp_prop = FreePropagation(timestep, verbose=verbose, exp_nmax=10, ene_0=ene_0)
+        fp_prop.build(hamiltonian, driver.trial, walkers, mpi_handler)
+        if walkers is None:
+            _, initial_walker = get_initial_walker(driver.trial)
+            # TODO this is a factory method not a class
+            walkers = UHFWalkersFP(
+                initial_walker,
+                driver.system.nup,
+                driver.system.ndown,
+                hamiltonian.nbasis,
+                num_walkers,
+                mpi_handler,
+            )
+            walkers.build(driver.trial)  # any intermediates that require information from trial
+        params = QMCParamsFP(
+            num_walkers=num_walkers,
+            total_num_walkers=num_walkers * comm.size,
+            num_blocks=num_blocks,
+            num_steps_per_block=num_steps_per_block,
+            timestep=timestep,
+            num_stblz=stabilize_freq,
+            pop_control_freq=pop_control_freq,
+            rng_seed=seed,
+            num_iterations_fp=num_iterations_fp,
+        )
+        return FPAFQMC(
+            driver.system,
+            driver.hamiltonian,
+            driver.trial,
+            driver.walkers,
+            fp_prop,
+            mpi_handler,
+            params,
+            verbose=(verbose and comm.rank == 0),
+        )
+
+    @staticmethod
+    # TODO: wavefunction type, trial type, hamiltonian type
+    def build_from_hdf5(
+        num_elec: Tuple[int, int],
+        ham_file,
+        wfn_file,
+        num_walkers: int = 100,
+        seed: int = None,
+        num_steps_per_block: int = 25,
+        num_blocks: int = 100,
+        timestep: float = 0.005,
+        stabilize_freq=5,
+        pop_control_freq=5,
+        num_dets_chunk=1,
+        num_dets_for_trial_props=100,
+        pack_cholesky=True,
+        verbose=True,
+    ) -> "FPAFQMC":
+        """Factory method to build FPAFQMC driver from hamiltonian and trial wavefunction.
+
+        Parameters
+        ----------
+        num_elec: tuple(int, int)
+            Number of alpha and beta electrons.
+        ham_file : str
+            Path to Hamiltonian describing the system.
+        wfn_file : str
+            Path to Trial wavefunction
+        num_walkers : int
+            Number of walkers per MPI process used in the simulation. The TOTAL
+                number of walkers is num_walkers * number of processes.
+        num_steps_per_block : int
+            Number of Monte Carlo steps before estimators are evaluatied.
+                Default 25.
+        num_blocks : int
+            Number of blocks to perform. Total number of steps = num_blocks *
+                num_steps_per_block.
+        timestep : float
+            Imaginary timestep. Default 0.005.
+        stabilize_freq : float
+            Frequency at which to perform QR factorization of walkers (in units
+                of steps.) Default 25.
+        pop_control_freq : int
+            Frequency at which to perform population control (in units of
+                steps.) Default 25.
+        num_det_chunks : int
+            Size of chunks of determinants to process during batching. Default=1 (no batching).
+        num_dets_for_trial_props: int
+            Number of determinants to use to evaluate trial wavefunction properties.
+        pack_cholesky : bool
+            Use symmetry to reduce memory consumption of integrals. Default True.
+        verbose : bool
+            Log verbosity. Default True i.e. print information to stdout.
+        """
+        mpi_handler = MPIHandler()
+        _verbose = verbose and mpi_handler.comm.rank == 0
+        ham = get_hamiltonian(
+            ham_file, mpi_handler.scomm, verbose=_verbose, pack_chol=pack_cholesky
+        )
+        trial = get_trial_wavefunction(
+            num_elec,
+            ham.nbasis,
+            wfn_file,
+            ndet_chunks=num_dets_chunk,
+            ndets_props=num_dets_for_trial_props,
+            verbose=_verbose,
+        )
+        trial.half_rotate(ham, mpi_handler.scomm)
+        return FPAFQMC.build(
+            trial.nelec,
+            ham,
+            trial,
+            num_walkers=num_walkers,
+            seed=seed,
+            num_steps_per_block=num_steps_per_block,
+            num_blocks=num_blocks,
+            timestep=timestep,
+            stabilize_freq=stabilize_freq,
+            pop_control_freq=pop_control_freq,
+            verbose=verbose,
+            mpi_handler=mpi_handler,
+        )
+
+    def setup_estimators(
+        self, filename, additional_estimators: Optional[Dict[str, EstimatorBase]] = None
+    ):
+        self.accumulators = WalkerAccumulator(
+            ["Weight", "WeightFactor", "HybridEnergy"], self.params.num_steps_per_block
+        )
+        comm = self.mpi_handler.comm
+        self.estimators = []
+        for i in range(self.params.num_blocks):
+            self.estimators.append(
+                EstimatorHandlerFP(
+                    self.mpi_handler.comm,
+                    self.system,
+                    self.hamiltonian,
+                    self.trial,
+                    walker_state=self.accumulators,
+                    verbose=(comm.rank == 0 and self.verbose),
+                    filename=f"{filename}.{i}",
+                    observables=("energy",),
+                )
+            )
+        if additional_estimators is not None:
+            raise NotImplementedError(
+                "Additional estimators not implemented yet for free projection."
+            )
+        # TODO: Move this to estimator and log uuid etc in serialization
+        json.encoder.FLOAT_REPR = lambda o: format(o, ".6f")
+        json_string = to_json(self)
+        for e in self.estimators:
+            e.json_string = json_string
+
+        for i, e in enumerate(self.estimators):
+            e.initialize(comm, i == 0)
+
+    def run(
+        self,
+        walkers=None,
+        estimator_filename="estimate.h5",
+        verbose=True,
+        additional_estimators: Optional[Dict[str, EstimatorBase]] = None,
+    ):
+        """Perform FP AFQMC simulation on state object by Gaussian sampling of short time projection.
+
+        Parameters
+        ----------
+        psi : :class:`pie.walker.Walkers` object
+            Initial wavefunction / distribution of walkers. Default None.
+        estimator_filename : str
+            File to write estimates to.
+        additional_estimators : dict
+            Dictionary of additional estimators to evaluate.
+        """
+        self.setup_timers()
+        tzero_setup = time.time()
+        if walkers is not None:
+            self.walkers = walkers
+        self.setup_timers()
+        eshift = 0.0
+        self.walkers.orthogonalise()
+
+        self.get_env_info()
+        self.copy_to_gpu()
+        self.distribute_hamiltonian()
+        self.setup_estimators(estimator_filename, additional_estimators=additional_estimators)
+
+        total_steps = self.params.num_steps_per_block * self.params.num_blocks
+
+        synchronize()
+        comm = self.mpi_handler.comm
+        self.tsetup += time.time() - tzero_setup
+
+        for iter in range(self.params.num_iterations_fp):
+            block_number = 0
+            _, initial_walker = get_initial_walker(self.trial)
+            # TODO this is a factory method not a class
+            initial_walkers = UHFWalkersFP(
+                initial_walker,
+                self.system.nup,
+                self.system.ndown,
+                self.hamiltonian.nbasis,
+                self.params.num_walkers,
+                self.mpi_handler,
+            )
+            initial_walkers.build(self.trial)
+            self.walkers = initial_walkers
+            for step in range(1, total_steps + 1):
+                synchronize()
+                start_step = time.time()
+                if step % self.params.num_stblz == 0:
+                    start = time.time()
+                    self.walkers.orthogonalise()
+                    synchronize()
+                    self.tortho += time.time() - start
+                start = time.time()
+
+                self.propagator.propagate_walkers(
+                    self.walkers, self.hamiltonian, self.trial, eshift
+                )
+
+                self.tprop_ovlp = self.propagator.timer.tovlp
+                self.tprop_update = self.propagator.timer.tupdate
+                self.tprop_gf = self.propagator.timer.tgf
+                self.tprop_vhs = self.propagator.timer.tvhs
+                self.tprop_gemm = self.propagator.timer.tgemm
+
+                # accumulate weight, hybrid energy etc. across block
+                start = time.time()
+                # self.accumulators.update(self.walkers)
+                synchronize()
+                self.testim += time.time() - start  # we dump this time into estimator
+                # calculate estimators
+                start = time.time()
+                if step % self.params.num_steps_per_block == 0:
+                    self.estimators[block_number].compute_estimators(
+                        system=self.system,
+                        hamiltonian=self.hamiltonian,
+                        trial=self.trial,
+                        walker_batch=self.walkers,
+                    )
+                    self.estimators[block_number].print_block(
+                        comm,
+                        iter,
+                        self.accumulators,
+                        time_step=block_number,
+                    )
+                    block_number += 1
+                synchronize()
+                self.testim += time.time() - start
+
+                # restart write features disabled
+                # if self.walkers.write_restart and step % self.walkers.write_freq == 0:
+                #     self.walkers.write_walkers_batch(comm)
+                # self.accumulators.zero()
+                synchronize()
+                self.tstep += time.time() - start_step
diff --git a/ipie/addons/free_projection/qmc/options.py b/ipie/addons/free_projection/qmc/options.py
new file mode 100644
index 00000000..89c657e3
--- /dev/null
+++ b/ipie/addons/free_projection/qmc/options.py
@@ -0,0 +1,26 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+from dataclasses import dataclass
+
+from ipie.qmc.options import QMCParams
+
+
+@dataclass
+class QMCParamsFP(QMCParams):
+    num_iterations_fp: int = 1
diff --git a/ipie/addons/free_projection/qmc/tests/__init__.py b/ipie/addons/free_projection/qmc/tests/__init__.py
new file mode 100644
index 00000000..e2aed039
--- /dev/null
+++ b/ipie/addons/free_projection/qmc/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/ipie/addons/free_projection/qmc/tests/test_afqmc_single_det_batch.py b/ipie/addons/free_projection/qmc/tests/test_afqmc_single_det_batch.py
new file mode 100644
index 00000000..22d011be
--- /dev/null
+++ b/ipie/addons/free_projection/qmc/tests/test_afqmc_single_det_batch.py
@@ -0,0 +1,98 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import tempfile
+
+import numpy
+import pytest
+
+from ipie.addons.free_projection.analysis.extraction import extract_observable
+from ipie.addons.free_projection.utils.testing import build_driver_test_instance_fp
+from ipie.config import MPI
+
+steps = 25
+blocks = 10
+seed = 7
+nwalkers = 10
+nmo = 14
+nelec = (4, 3)
+# steps = 1
+# blocks = 10
+# seed = 7
+# nwalkers = 1
+# nmo = 4
+# nelec = (2, 1)
+
+pop_control_freq = 5
+stabilise_freq = 5
+comm = MPI.COMM_WORLD
+
+options = {
+    "dt": 0.005,
+    "nstblz": 5,
+    "nwalkers": nwalkers,
+    "nwalkers_per_task": nwalkers,
+    "batched": True,
+    "hybrid": True,
+    "steps": steps,
+    "blocks": blocks,
+    "pop_control_freq": pop_control_freq,
+    "stabilise_freq": stabilise_freq,
+    "rng_seed": seed,
+}
+
+
+@pytest.mark.driver
+def test_generic_single_det_batch_fp():
+    with tempfile.NamedTemporaryFile() as tmpf:
+        driver_options = {
+            "verbosity": 0,
+            "get_sha1": False,
+            "qmc": options,
+            "estimates": {"filename": tmpf.name, "observables": {"energy": {}}},
+            "walkers": {"population_control": "pair_branch"},
+        }
+
+        afqmc = build_driver_test_instance_fp(
+            nelec,
+            nmo,
+            trial_type="single_det",
+            options=driver_options,
+            seed=7,
+        )
+        afqmc.setup_estimators(tmpf.name)
+        afqmc.run(verbose=False, estimator_filename=tmpf.name)
+        afqmc.finalise(verbose=0)
+        for i in range(len(afqmc.estimators)):
+            data_batch = extract_observable(f"{tmpf.name}.{i}", "energy")
+            numer_batch = data_batch["ENumer"]
+            denom_batch = data_batch["EDenom"]
+            etot_batch = data_batch["ETotal"]
+            assert etot_batch.dtype == numpy.complex128
+
+        data_batch = extract_observable(f"{tmpf.name}.0", "energy")
+        numer_batch = data_batch["ENumer"]
+        denom_batch = data_batch["EDenom"]
+        etot_batch = data_batch["ETotal"]
+        assert numpy.allclose(numpy.sum(numer_batch), 89026.91053310843 + 37.16899096646583j)
+        assert numpy.allclose(numpy.sum(denom_batch), 7431.790242711337 + 12.22172751384279j)
+        assert numpy.allclose(numpy.sum(etot_batch), 35.93783190822862 - 0.04412020753601597j)
+
+
+if __name__ == "__main__":
+    test_generic_single_det_batch_fp()
diff --git a/ipie/addons/free_projection/utils/__init__.py b/ipie/addons/free_projection/utils/__init__.py
new file mode 100644
index 00000000..e2aed039
--- /dev/null
+++ b/ipie/addons/free_projection/utils/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/ipie/addons/free_projection/utils/testing.py b/ipie/addons/free_projection/utils/testing.py
new file mode 100644
index 00000000..62114621
--- /dev/null
+++ b/ipie/addons/free_projection/utils/testing.py
@@ -0,0 +1,155 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fionn.malone@gmail.com>
+#          Joonho Lee
+#
+
+from typing import Tuple, Union
+
+import numpy
+
+from ipie.addons.free_projection.propagation.free_propagation import FreePropagation
+from ipie.addons.free_projection.qmc.fp_afqmc import FPAFQMC
+from ipie.addons.free_projection.walkers.uhf_walkers import UHFWalkersFP
+from ipie.hamiltonians import Generic as HamGeneric
+from ipie.qmc.options import QMCOpts
+from ipie.systems import Generic
+from ipie.utils.io import get_input_value
+from ipie.utils.mpi import MPIHandler
+from ipie.utils.testing import build_random_trial, generate_hamiltonian, TestData
+
+
+def build_test_case_handlers_fp(
+    num_elec: Tuple[int, int],
+    num_basis: int,
+    num_dets=1,
+    trial_type="single_det",
+    wfn_type="opt",
+    complex_integrals: bool = False,
+    complex_trial: bool = False,
+    seed: Union[int, None] = None,
+    rhf_trial: bool = False,
+    two_body_only: bool = False,
+    choltol: float = 1e-3,
+    reortho: bool = True,
+    options: Union[dict, None] = None,
+):
+    if seed is not None:
+        numpy.random.seed(seed)
+    sym = 8
+    if complex_integrals:
+        sym = 4
+    h1e, chol, _, eri = generate_hamiltonian(
+        num_basis, num_elec, cplx=complex_integrals, sym=sym, tol=choltol
+    )
+    system = Generic(nelec=num_elec)
+    ham = HamGeneric(
+        h1e=numpy.array([h1e, h1e]),
+        chol=chol.reshape((-1, num_basis**2)).T.copy(),
+        ecore=0,
+    )
+    ham.eri = eri.copy()
+    trial, init = build_random_trial(
+        num_elec,
+        num_basis,
+        num_dets=num_dets,
+        wfn_type=wfn_type,
+        trial_type=trial_type,
+        complex_trial=complex_trial,
+        rhf_trial=rhf_trial,
+    )
+    trial.half_rotate(ham)
+    trial.calculate_energy(system, ham)
+    # necessary for backwards compatabilty with tests
+    if seed is not None:
+        numpy.random.seed(seed)
+
+    nwalkers = get_input_value(options, "nwalkers", default=10, alias=["num_walkers"])
+    walkers = UHFWalkersFP(init, system.nup, system.ndown, ham.nbasis, nwalkers, MPIHandler())
+    walkers.build(trial)  # any intermediates that require information from trial
+
+    prop = FreePropagation(time_step=options["dt"])
+    prop.build(ham, trial)
+
+    trial.calc_greens_function(walkers)
+    for _ in range(options.num_steps):
+        if two_body_only:
+            prop.propagate_walkers_two_body(walkers, ham, trial)
+        else:
+            prop.propagate_walkers(walkers, ham, trial, trial.energy)
+        if reortho:
+            walkers.reortho()
+        trial.calc_greens_function(walkers)
+
+    return TestData(trial, walkers, ham, prop)
+
+
+def build_driver_test_instance_fp(
+    num_elec: Tuple[int, int],
+    num_basis: int,
+    num_dets=1,
+    trial_type="phmsd",
+    wfn_type="opt",
+    complex_integrals: bool = False,
+    complex_trial: bool = False,
+    rhf_trial: bool = False,
+    seed: Union[int, None] = None,
+    density_diff=False,
+    options: Union[dict, None] = None,
+):
+    if seed is not None:
+        numpy.random.seed(seed)
+    h1e, chol, _, _ = generate_hamiltonian(num_basis, num_elec, cplx=complex_integrals)
+    system = Generic(nelec=num_elec)
+    ham = HamGeneric(
+        h1e=numpy.array([h1e, h1e]),
+        chol=chol.reshape((-1, num_basis**2)).T.copy(),
+        ecore=0,
+    )
+    if density_diff:
+        ham.density_diff = True
+    trial, _ = build_random_trial(
+        num_elec,
+        num_basis,
+        num_dets=num_dets,
+        wfn_type=wfn_type,
+        trial_type=trial_type,
+        complex_trial=complex_trial,
+        rhf_trial=rhf_trial,
+    )
+    trial.half_rotate(ham)
+    try:
+        trial.calculate_energy(system, ham)
+    except NotImplementedError:
+        pass
+
+    qmc_opts = get_input_value(options, "qmc", default={}, alias=["qmc_options"])
+    qmc = QMCOpts(qmc_opts, verbose=0)
+    qmc.nwalkers = qmc.nwalkers
+    afqmc = FPAFQMC.build(
+        num_elec,
+        ham,
+        trial,
+        num_walkers=qmc.nwalkers,
+        seed=qmc.rng_seed,
+        num_steps_per_block=5,
+        num_blocks=2,
+        timestep=qmc.dt,
+        stabilize_freq=qmc.nstblz,
+        pop_control_freq=qmc.npop_control,
+        ene_0=trial.energy,
+        num_iterations_fp=3,
+    )
+    return afqmc
diff --git a/ipie/addons/free_projection/walkers/__init__.py b/ipie/addons/free_projection/walkers/__init__.py
new file mode 100644
index 00000000..e2aed039
--- /dev/null
+++ b/ipie/addons/free_projection/walkers/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/ipie/addons/free_projection/walkers/tests/__init__.py b/ipie/addons/free_projection/walkers/tests/__init__.py
new file mode 100644
index 00000000..e2aed039
--- /dev/null
+++ b/ipie/addons/free_projection/walkers/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/ipie/addons/free_projection/walkers/tests/test_single_det_uhf.py b/ipie/addons/free_projection/walkers/tests/test_single_det_uhf.py
new file mode 100644
index 00000000..385ed4b9
--- /dev/null
+++ b/ipie/addons/free_projection/walkers/tests/test_single_det_uhf.py
@@ -0,0 +1,51 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Joonho Lee
+#          Fionn Malone <fionn.malone@gmail.com>
+#
+
+import pytest
+
+from ipie.addons.free_projection.utils.testing import build_test_case_handlers_fp
+from ipie.utils.misc import dotdict
+
+
+@pytest.mark.unit
+def test_reortho_batch_fp():
+    nelec = (5, 5)
+    nwalkers = 10
+    nsteps = 10
+    nmo = 10
+    qmc = dotdict(
+        {
+            "dt": 0.005,
+            "nstblz": 5,
+            "nwalkers": nwalkers,
+            "batched": False,
+            "hybrid": True,
+            "num_steps": nsteps,
+        }
+    )
+    qmc.batched = True
+    batched_data = build_test_case_handlers_fp(
+        nelec, nmo, num_dets=1, complex_trial=True, options=qmc, seed=7
+    )
+    batched_data.walkers.orthogonalise()
+    assert batched_data.walkers.phia.shape == (nwalkers, nmo, nelec[0])
+    assert batched_data.walkers.phib.shape == (nwalkers, nmo, nelec[1])
+
+
+if __name__ == "__main__":
+    test_reortho_batch_fp()
diff --git a/ipie/addons/free_projection/walkers/uhf_walkers.py b/ipie/addons/free_projection/walkers/uhf_walkers.py
new file mode 100644
index 00000000..ee418898
--- /dev/null
+++ b/ipie/addons/free_projection/walkers/uhf_walkers.py
@@ -0,0 +1,82 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+from ipie.config import config
+from ipie.utils.backend import arraylib as xp
+from ipie.utils.backend import qr, qr_mode, synchronize
+from ipie.walkers.uhf_walkers import UHFWalkers
+
+
+class UHFWalkersFP(UHFWalkers):
+    """UHF style walker specialized for its use with free projection."""
+
+    def orthogonalise(self, free_projection=False):
+        """Orthogonalise all walkers.
+
+        Parameters
+        ----------
+        free_projection : bool
+            This flag is not used here.
+        """
+        detR = self.reortho()
+        magn, dtheta = xp.abs(self.detR), xp.angle(self.detR)
+        self.weight *= magn
+        self.phase *= xp.exp(1j * dtheta)
+        return detR
+
+    def reortho_batched(self):
+        assert config.get_option("use_gpu")
+        (self.phia, Rup) = qr(self.phia, mode=qr_mode)
+        Rup_diag = xp.einsum("wii->wi", Rup)
+        det = xp.prod(Rup_diag, axis=1)
+
+        if self.ndown > 0:
+            (self.phib, Rdn) = qr(self.phib, mode=qr_mode)
+            Rdn_diag = xp.einsum("wii->wi", Rdn)
+            det *= xp.prod(Rdn_diag, axis=1)
+        self.detR = det
+        self.ovlp = self.ovlp / self.detR
+        synchronize()
+        return self.detR
+
+    def reortho(self):
+        """reorthogonalise walkers for free projection, retaining normalization.
+
+        parameters
+        ----------
+        """
+        if config.get_option("use_gpu"):
+            return self.reortho_batched()
+        else:
+            ndown = self.ndown
+            detR = []
+            for iw in range(self.nwalkers):
+                (self.phia[iw], Rup) = qr(self.phia[iw], mode=qr_mode)
+                det_i = xp.prod(xp.diag(Rup))
+
+                if ndown > 0:
+                    (self.phib[iw], Rdn) = qr(self.phib[iw], mode=qr_mode)
+                    det_i *= xp.prod(xp.diag(Rdn))
+
+                detR += [det_i]
+                self.log_detR[iw] += xp.log(detR[iw])
+                self.detR[iw] = detR[iw]
+                self.ovlp[iw] = self.ovlp[iw] / detR[iw]
+
+        synchronize()
+        return self.detR
diff --git a/ipie/addons/thermal/__init__.py b/ipie/addons/thermal/__init__.py
new file mode 100644
index 00000000..f91ef518
--- /dev/null
+++ b/ipie/addons/thermal/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
diff --git a/ipie/addons/thermal/analysis/__init__.py b/ipie/addons/thermal/analysis/__init__.py
new file mode 100644
index 00000000..f91ef518
--- /dev/null
+++ b/ipie/addons/thermal/analysis/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
diff --git a/ipie/addons/thermal/analysis/extraction.py b/ipie/addons/thermal/analysis/extraction.py
new file mode 100644
index 00000000..5f725f21
--- /dev/null
+++ b/ipie/addons/thermal/analysis/extraction.py
@@ -0,0 +1,64 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+
+from ipie.utils.misc import get_from_dict
+
+
+def set_info(frame, md):
+    ncols = len(frame.columns)
+    system = md.get("system")
+    hamiltonian = md.get("hamiltonian")
+    trial = md.get("trial")
+    qmc = md.get("params")
+    fp = get_from_dict(md, ["propagators", "free_projection"])
+    bp = get_from_dict(md, ["estimates", "estimates", "back_prop"])
+
+    br = qmc.get("beta_scaled")
+
+    ints = system.get("integral_file")
+    chol = system.get("threshold")
+
+    frame["nup"] = system.get("nup")
+    frame["ndown"] = system.get("ndown")
+    frame["mu"] = qmc.get("mu")
+    frame["beta"] = qmc.get("beta")
+    frame["dt"] = qmc.get("timestep")
+    frame["ntot_walkers"] = qmc.get("total_num_walkers", 0)
+    frame["nbasis"] = hamiltonian.get("nbasis", 0)
+
+    if trial is not None:
+        frame["mu_T"] = trial.get("mu")
+        frame["Nav_T"] = trial.get("nav")
+
+    if fp is not None:
+        frame["free_projection"] = fp
+
+    if bp is not None:
+        frame["tau_bp"] = bp["tau_bp"]
+
+    if br is not None:
+        frame["beta_red"] = br
+
+    if ints is not None:
+        frame["integrals"] = ints
+
+    if chol is not None:
+        frame["cholesky_treshold"] = chol
+
+    return list(frame.columns[ncols:])
diff --git a/ipie/addons/thermal/analysis/thermal_analysis.py b/ipie/addons/thermal/analysis/thermal_analysis.py
new file mode 100644
index 00000000..7fefbeba
--- /dev/null
+++ b/ipie/addons/thermal/analysis/thermal_analysis.py
@@ -0,0 +1,124 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+#!/usr/bin/env python
+
+import argparse
+import glob
+import sys
+
+import numpy
+import pandas as pd
+import scipy.optimize
+
+from ipie.addons.thermal.analysis.extraction import set_info
+from ipie.analysis.extraction import extract_observable, get_metadata
+
+
+def parse_args(args):
+    """Parse command-line arguments.
+
+    Parameters
+    ----------
+    args : list of strings
+        command-line arguments.
+
+    Returns
+    -------
+    options : :class:`argparse.ArgumentParser`
+        Command line arguments.
+    """
+
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "-f", nargs="+", dest="filenames", help="Space-separated list of files to analyse."
+    )
+
+    options = parser.parse_args(args)
+
+    if not options.filenames:
+        parser.print_help()
+        sys.exit(1)
+
+    return options
+
+
+def analyse(files, block_idx=1):
+    sims = []
+    files = sorted(files)
+
+    for f in files:
+        data_energy = extract_observable(f, name="energy", block_idx=block_idx)
+        data_nav = extract_observable(f, name="nav", block_idx=block_idx)
+        data = pd.concat([data_energy, data_nav["Nav"]], axis=1)
+        md = get_metadata(f)
+        keys = set_info(data, md)
+        sims.append(data[1:])
+
+    full = pd.concat(sims).groupby(keys, sort=False)
+
+    analysed = []
+    for i, g in full:
+        cols = ["ETotal", "E1Body", "E2Body", "Nav"]
+        averaged = pd.DataFrame(index=[0])
+
+        for c in cols:
+            mean = numpy.real(g[c].values).mean()
+            error = scipy.stats.sem(numpy.real(g[c].values), ddof=1)
+            averaged[c] = [mean]
+            averaged[c + "_error"] = [error]
+
+        for k, v in zip(full.keys, i):
+            averaged[k] = v
+
+        analysed.append(averaged)
+
+    return pd.concat(analysed).reset_index(drop=True).sort_values(by=keys)
+
+
+def nav_mu(mu, coeffs):
+    return numpy.polyval(coeffs, mu)
+
+
+def main(args):
+    """Run reblocking and data analysis on PAUXY output.
+
+    Parameters
+    ----------
+    args : list of strings
+        command-line arguments.
+
+    Returns
+    -------
+    None.
+    """
+
+    options = parse_args(args)
+    if "*" in options.filenames[0]:
+        files = glob.glob(options.filenames[0])
+
+    else:
+        files = options.filenames
+
+    data = analyse(files)
+
+    print(data.to_string(index=False))
+
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/ipie/addons/thermal/estimators/__init__.py b/ipie/addons/thermal/estimators/__init__.py
new file mode 100644
index 00000000..f91ef518
--- /dev/null
+++ b/ipie/addons/thermal/estimators/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
diff --git a/ipie/addons/thermal/estimators/energy.py b/ipie/addons/thermal/estimators/energy.py
new file mode 100644
index 00000000..5cf34b03
--- /dev/null
+++ b/ipie/addons/thermal/estimators/energy.py
@@ -0,0 +1,60 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+from typing import Union
+
+from ipie.addons.thermal.estimators.generic import local_energy_generic_cholesky
+from ipie.addons.thermal.estimators.thermal import one_rdm_from_G
+from ipie.addons.thermal.walkers.uhf_walkers import UHFThermalWalkers
+from ipie.estimators.energy import EnergyEstimator
+from ipie.hamiltonians.generic import GenericComplexChol, GenericRealChol
+from ipie.utils.backend import arraylib as xp
+
+
+def local_energy(
+    hamiltonian: Union[GenericRealChol, GenericComplexChol], walkers: UHFThermalWalkers
+):
+    energies = xp.zeros((walkers.nwalkers, 3), dtype=xp.complex128)
+
+    for iw in range(walkers.nwalkers):
+        # Want the full Green's function when calculating observables.
+        walkers.calc_greens_function(iw, slice_ix=walkers.stack[iw].nslice)
+        P = one_rdm_from_G(xp.array([walkers.Ga[iw], walkers.Gb[iw]]))
+        energy = local_energy_generic_cholesky(hamiltonian, P)
+        energies[iw] = energy
+
+    return energies
+
+
+class ThermalEnergyEstimator(EnergyEstimator):
+    def __init__(self, system=None, hamiltonian=None, trial=None, filename=None):
+        super().__init__(system=system, ham=hamiltonian, trial=trial, filename=filename)
+
+    def compute_estimator(self, system=None, walkers=None, hamiltonian=None, trial=None):
+        # Need to be able to dispatch here.
+        # Re-calculated Green's function in `local_energy`.
+        if hamiltonian is None:
+            raise ValueError("Hamiltonian must not be none.")
+        if walkers is None:
+            raise ValueError("walkers must not be none.")
+        energy = local_energy(hamiltonian, walkers)
+        self._data["ENumer"] = xp.sum(walkers.weight * energy[:, 0].real)
+        self._data["EDenom"] = xp.sum(walkers.weight)
+        self._data["E1Body"] = xp.sum(walkers.weight * energy[:, 1].real)
+        self._data["E2Body"] = xp.sum(walkers.weight * energy[:, 2].real)
+        return self.data
diff --git a/ipie/addons/thermal/estimators/generic.py b/ipie/addons/thermal/estimators/generic.py
new file mode 100644
index 00000000..cddc7a15
--- /dev/null
+++ b/ipie/addons/thermal/estimators/generic.py
@@ -0,0 +1,138 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import plum
+import numpy
+from ipie.hamiltonians.generic import GenericRealChol, GenericComplexChol
+
+
+@plum.dispatch
+def local_energy_generic_cholesky(hamiltonian: GenericRealChol, P):
+    r"""Calculate local for generic two-body hamiltonian.
+
+    This uses the cholesky decomposed two-electron integrals.
+
+    Parameters
+    ----------
+    hamiltonian : :class:`Generic`
+        ab-initio hamiltonian information
+    P : :class:`numpy.ndarray`
+        Walker's density matrix.
+
+    Returns
+    -------
+    (E, T, V): tuple
+        Local, kinetic and potential energies.
+    """
+    # Element wise multiplication.
+    e1b = numpy.sum(hamiltonian.H1[0] * P[0]) + numpy.sum(hamiltonian.H1[1] * P[1])
+    nbasis = hamiltonian.nbasis
+    nchol = hamiltonian.nchol
+    Pa, Pb = P[0], P[1]
+
+    # Ecoul.
+    Xa = hamiltonian.chol.T.dot(Pa.real.ravel()) + 1.0j * hamiltonian.chol.T.dot(Pa.imag.ravel())
+    Xb = hamiltonian.chol.T.dot(Pb.real.ravel()) + 1.0j * hamiltonian.chol.T.dot(Pb.imag.ravel())
+    X = Xa + Xb
+    ecoul = 0.5 * numpy.dot(X, X)
+
+    # Ex.
+    PaT = Pa.T.copy()
+    PbT = Pb.T.copy()
+    T = numpy.zeros((nbasis, nbasis), dtype=numpy.complex128)
+    exx = 0.0j  # we will iterate over cholesky index to update Ex energy for alpha and beta
+
+    for x in range(nchol):  # Write a numba function that calls BLAS for this.
+        Lmn = hamiltonian.chol[:, x].reshape((nbasis, nbasis))
+        T[:, :].real = PaT.real.dot(Lmn)
+        T[:, :].imag = PaT.imag.dot(Lmn)
+        exx += numpy.trace(T.dot(T))
+        T[:, :].real = PbT.real.dot(Lmn)
+        T[:, :].imag = PbT.imag.dot(Lmn)
+        exx += numpy.trace(T.dot(T))
+
+    exx *= 0.5
+    e2b = ecoul - exx
+    return (e1b + e2b + hamiltonian.ecore, e1b + hamiltonian.ecore, e2b)
+
+
+@plum.dispatch
+def local_energy_generic_cholesky(hamiltonian: GenericComplexChol, P):
+    r"""Calculate local for generic two-body hamiltonian.
+
+    This uses the cholesky decomposed two-electron integrals.
+
+    Parameters
+    ----------
+    hamiltonian : :class:`Generic`
+        ab-initio hamiltonian information
+    P : :class:`numpy.ndarray`
+        Walker's density matrix.
+
+    Returns
+    -------
+    (E, T, V): tuple
+        Local, kinetic and potential energies.
+    """
+    # Element wise multiplication.
+    e1b = numpy.sum(hamiltonian.H1[0] * P[0]) + numpy.sum(hamiltonian.H1[1] * P[1])
+    nbasis = hamiltonian.nbasis
+    nchol = hamiltonian.nchol
+    Pa, Pb = P[0], P[1]
+
+    # Ecoul.
+    XAa = hamiltonian.A.T.dot(Pa.ravel())
+    XAb = hamiltonian.A.T.dot(Pb.ravel())
+    XA = XAa + XAb
+
+    XBa = hamiltonian.B.T.dot(Pa.ravel())
+    XBb = hamiltonian.B.T.dot(Pb.ravel())
+    XB = XBa + XBb
+
+    ecoul = 0.5 * (numpy.dot(XA, XA) + numpy.dot(XB, XB))
+
+    # Ex.
+    PaT = Pa.T.copy()
+    PbT = Pb.T.copy()
+    TA = numpy.zeros((nbasis, nbasis), dtype=numpy.complex128)
+    TB = numpy.zeros((nbasis, nbasis), dtype=numpy.complex128)
+    exx = 0.0j  # we will iterate over cholesky index to update Ex energy for alpha and beta
+
+    for x in range(nchol):  # write a cython function that calls blas for this.
+        Amn = hamiltonian.A[:, x].reshape((nbasis, nbasis))
+        Bmn = hamiltonian.B[:, x].reshape((nbasis, nbasis))
+        TA[:, :] = PaT.dot(Amn)
+        TB[:, :] = PaT.dot(Bmn)
+        exx += numpy.trace(TA.dot(TA)) + numpy.trace(TB.dot(TB))
+
+        TA[:, :] = PbT.dot(Amn)
+        TB[:, :] = PbT.dot(Bmn)
+        exx += numpy.trace(TA.dot(TA)) + numpy.trace(TB.dot(TB))
+
+    exx *= 0.5
+    e2b = ecoul - exx
+    return (e1b + e2b + hamiltonian.ecore, e1b + hamiltonian.ecore, e2b)
+
+
+def fock_generic(hamiltonian, P):
+    nbasis = hamiltonian.nbasis
+    nchol = hamiltonian.nchol
+    hs_pot = hamiltonian.chol.T.reshape(nchol, nbasis, nbasis)
+    mf_shift = 1j * numpy.einsum("lpq,spq->l", hs_pot, P)
+    VMF = 1j * numpy.einsum("lpq,l->pq", hs_pot, mf_shift)
+    return hamiltonian.h1e_mod - VMF
diff --git a/ipie/addons/thermal/estimators/greens_function.py b/ipie/addons/thermal/estimators/greens_function.py
new file mode 100644
index 00000000..562aa170
--- /dev/null
+++ b/ipie/addons/thermal/estimators/greens_function.py
@@ -0,0 +1,141 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+import scipy.linalg
+
+
+def greens_function(A):
+    r"""Construct Greens function from density matrix.
+
+    .. math::
+        G_{ij} = \langle c_{i} c_j^{\dagger} \rangle \\
+               = \left[\frac{1}{1+A}\right]_{ij}
+
+    Uses stable algorithm from White et al. (1988)
+
+    Parameters
+    ----------
+    A : :class:`numpy.ndarray`
+        Density matrix (product of B matrices).
+
+    Returns
+    -------
+    G : :class:`numpy.ndarray`
+        Thermal Green's function.
+    """
+    G = numpy.zeros(A.shape, dtype=A.dtype)
+    (U1, S1, V1) = scipy.linalg.svd(A)
+    T = numpy.dot(U1.conj().T, V1.conj().T) + numpy.diag(S1)
+    (U2, S2, V2) = scipy.linalg.svd(T)
+    U3 = numpy.dot(U1, U2)
+    D3 = numpy.diag(1.0 / S2)
+    V3 = numpy.dot(V2, V1)
+    G = (V3.conj().T).dot(D3).dot(U3.conj().T)
+    return G
+
+
+def greens_function_qr_strat(walkers, iw, slice_ix=None, inplace=True):
+    """Compute the Green's function for walker with index `iw` at time
+    `slice_ix`. Uses the Stratification method (DOI 10.1109/IPDPS.2012.37)
+    """
+    stack_iw = walkers.stack[iw]
+
+    if slice_ix == None:
+        slice_ix = stack_iw.time_slice
+
+    bin_ix = slice_ix // stack_iw.stack_size
+    # For final time slice want first block to be the rightmost (for energy
+    # evaluation).
+    if bin_ix == stack_iw.nstack:
+        bin_ix = -1
+
+    Ga_iw, Gb_iw = None, None
+    if not inplace:
+        Ga_iw = numpy.zeros(walkers.Ga[iw].shape, walkers.Ga.dtype)
+        Gb_iw = numpy.zeros(walkers.Gb[iw].shape, walkers.Gb.dtype)
+
+    for spin in [0, 1]:
+        # Need to construct the product A(l) = B_l B_{l-1}..B_L...B_{l+1} in
+        # stable way. Iteratively construct column pivoted QR decompositions
+        # (A = QDT) starting from the rightmost (product of) propagator(s).
+        B = stack_iw.get((bin_ix + 1) % stack_iw.nstack)
+
+        (Q1, R1, P1) = scipy.linalg.qr(B[spin], pivoting=True, check_finite=False)
+        # Form D matrices
+        D1 = numpy.diag(R1.diagonal())
+        D1inv = numpy.diag(1.0 / R1.diagonal())
+        T1 = numpy.einsum("ii,ij->ij", D1inv, R1)
+        # permute them
+        T1[:, P1] = T1[:, range(walkers.nbasis)]
+
+        for i in range(2, stack_iw.nstack + 1):
+            ix = (bin_ix + i) % stack_iw.nstack
+            B = stack_iw.get(ix)
+            C2 = numpy.dot(numpy.dot(B[spin], Q1), D1)
+            (Q1, R1, P1) = scipy.linalg.qr(C2, pivoting=True, check_finite=False)
+            # Compute D matrices
+            D1inv = numpy.diag(1.0 / R1.diagonal())
+            D1 = numpy.diag(R1.diagonal())
+            tmp = numpy.einsum("ii,ij->ij", D1inv, R1)
+            tmp[:, P1] = tmp[:, range(walkers.nbasis)]
+            T1 = numpy.dot(tmp, T1)
+
+        # G^{-1} = 1+A = 1+QDT = Q (Q^{-1}T^{-1}+D) T
+        # Write D = Db^{-1} Ds
+        # Then G^{-1} = Q Db^{-1}(Db Q^{-1}T^{-1}+Ds) T
+        Db = numpy.zeros(B[spin].shape, B[spin].dtype)
+        Ds = numpy.zeros(B[spin].shape, B[spin].dtype)
+        for i in range(Db.shape[0]):
+            absDlcr = abs(Db[i, i])
+            if absDlcr > 1.0:
+                Db[i, i] = 1.0 / absDlcr
+                Ds[i, i] = numpy.sign(D1[i, i])
+            else:
+                Db[i, i] = 1.0
+                Ds[i, i] = D1[i, i]
+
+        T1inv = scipy.linalg.inv(T1, check_finite=False)
+        # C = (Db Q^{-1}T^{-1}+Ds)
+        C = numpy.dot(numpy.einsum("ii,ij->ij", Db, Q1.conj().T), T1inv) + Ds
+        Cinv = scipy.linalg.inv(C, check_finite=False)
+
+        # Then G = T^{-1} C^{-1} Db Q^{-1}
+        # Q is unitary.
+        if inplace:
+            if spin == 0:
+                walkers.Ga[iw] = numpy.dot(
+                    numpy.dot(T1inv, Cinv), numpy.einsum("ii,ij->ij", Db, Q1.conj().T)
+                )
+            else:
+                walkers.Gb[iw] = numpy.dot(
+                    numpy.dot(T1inv, Cinv), numpy.einsum("ii,ij->ij", Db, Q1.conj().T)
+                )
+
+        else:
+            if spin == 0:
+                Ga_iw = numpy.dot(
+                    numpy.dot(T1inv, Cinv), numpy.einsum("ii,ij->ij", Db, Q1.conj().T)
+                )
+
+            else:
+                Gb_iw = numpy.dot(
+                    numpy.dot(T1inv, Cinv), numpy.einsum("ii,ij->ij", Db, Q1.conj().T)
+                )
+
+    return Ga_iw, Gb_iw
diff --git a/ipie/addons/thermal/estimators/handler.py b/ipie/addons/thermal/estimators/handler.py
new file mode 100644
index 00000000..87bc5c9e
--- /dev/null
+++ b/ipie/addons/thermal/estimators/handler.py
@@ -0,0 +1,172 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+"""Routines and classes for estimation of observables."""
+
+import os
+from typing import Tuple, Union
+
+from ipie.addons.thermal.estimators.energy import ThermalEnergyEstimator
+from ipie.addons.thermal.estimators.particle_number import ThermalNumberEstimator
+from ipie.config import config, MPI
+from ipie.estimators.handler import EstimatorHandler
+
+# Some supported (non-custom) estimators
+_predefined_estimators = {
+    "energy": ThermalEnergyEstimator,
+    "nav": ThermalNumberEstimator,
+}
+
+
+class ThermalEstimatorHandler(EstimatorHandler):
+    """Container for qmc options of observables.
+
+    Parameters
+    ----------
+    comm : MPI.COMM_WORLD
+        MPI Communicator.
+    hamiltonian : :class:`ipie.hamiltonian.X' object
+        Hamiltonian describing the system.
+    trial : :class:`ipie.trial_wavefunction.X' object
+        Trial wavefunction class.
+    walker_state : :class:`WalkerAccumulator` object
+        WalkerAccumulator class.
+    verbose : bool
+        If true we print out additional setup information.
+    filename : str
+        .h5 file name for saving data.
+    basename : str
+        .h5 base name for saving data.
+    overwrite : bool
+        Whether to overwrite .h5 files.
+    observables : tuple
+        Tuple listing observables to be calculated.
+
+    Attributes
+    ----------
+    estimators : dict
+        Dictionary of estimator objects.
+    """
+
+    def __init__(
+        self,
+        comm,
+        hamiltonian,
+        trial,
+        walker_state=None,
+        verbose: bool = False,
+        filename: Union[str, None] = None,
+        basename: str = "estimates",
+        overwrite=True,
+        observables: Tuple[str] = ("energy", "nav"),  # TODO: Use factory method!
+    ):
+        if verbose:
+            print("# Setting up estimator object.")
+        if comm.rank == 0:
+            self.basename = basename
+            self.filename = filename
+            self.index = 0
+            if self.filename is None:
+                self.filename = f"{self.basename}.{self.index}.h5"
+                while os.path.isfile(self.filename) and not overwrite:
+                    self.index = int(self.filename.split(".")[1])
+                    self.index = self.index + 1
+                    self.filename = f"{self.basename}.{self.index}.h5"
+            if verbose:
+                print(f"# Writing estimator data to {self.filename}")
+        else:
+            self.filename = None
+        self.buffer_size = config.get_option("estimator_buffer_size")
+        if walker_state is not None:
+            self.num_walker_props = walker_state.size
+            self.walker_header = walker_state.names
+        else:
+            self.num_walker_props = 0
+            self.walker_header = ""
+        self._estimators = {}
+        self._shapes = []
+        self._offsets = {}
+        self.json_string = "{}"
+        # TODO: Replace this, should be built outside
+        for obs in observables:
+            try:
+                est = _predefined_estimators[obs](hamiltonian=hamiltonian, trial=trial)
+                self[obs] = est
+            except KeyError:
+                raise RuntimeError(f"unknown observable: {obs}")
+        if verbose:
+            print("# Finished settting up estimator object.")
+
+    def compute_estimators(self, system=None, hamiltonian=None, trial=None, walker_batch=None):
+        """Update estimators with bached walkers.
+
+        Parameters
+        ----------
+        hamiltonian : :class:`ipie.hamiltonian.X' object
+            Hamiltonian describing the system.
+        trial : :class:`ipie.trial_wavefunction.X' object
+            Trial wavefunction class.
+        walker_batch : :class:`UHFThermalWalkers' object
+            Walkers class.
+        """
+        # Compute all estimators
+        # For the moment only consider estimators compute per block.
+        # TODO: generalize for different block groups (loop over groups)
+        offset = self.num_walker_props
+        for k, e in self.items():
+            e.compute_estimator(walkers=walker_batch, hamiltonian=hamiltonian, trial=trial)
+            start = offset + self.get_offset(k)
+            end = start + int(self[k].size)
+            self.local_estimates[start:end] += e.data
+
+    def print_time_slice(self, comm, time_slice, walker_state):
+        """Print estimators at a time slice of the imgainary time propagation.
+
+        Parameters
+        ----------
+        comm : MPI.COMM_WORLD
+            MPI Communicator.
+        time_slice : int
+            Time slice.
+        walker_state : :class:`WalkerAccumulator` object
+            WalkerAccumulator class.
+        """
+        comm.Reduce(self.local_estimates, self.global_estimates, op=MPI.SUM)
+        # Get walker data.
+        offset = walker_state.size
+
+        if comm.rank == 0:
+            k = "energy"
+            e = self[k]
+            start = offset + self.get_offset(k)
+            end = start + int(self[k].size)
+            estim_data = self.global_estimates[start:end]
+            e.post_reduce_hook(estim_data)
+            etotal = estim_data[e.get_index("ETotal")]
+
+            k = "nav"
+            e = self[k]
+            start = offset + self.get_offset(k)
+            end = start + int(self[k].size)
+            estim_data = self.global_estimates[start:end]
+            e.post_reduce_hook(estim_data)
+            nav = estim_data[e.get_index("Nav")]
+
+            print(f"cut : {time_slice} {nav.real} {etotal.real}")
+
+        self.zero()
diff --git a/ipie/addons/thermal/estimators/local_energy.py b/ipie/addons/thermal/estimators/local_energy.py
new file mode 100644
index 00000000..5b57ae78
--- /dev/null
+++ b/ipie/addons/thermal/estimators/local_energy.py
@@ -0,0 +1,55 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+from typing import Union
+
+from ipie.hamiltonians.generic import GenericComplexChol, GenericRealChol
+from ipie.addons.thermal.trial.one_body import OneBody
+from ipie.addons.thermal.trial.mean_field import MeanField
+from ipie.addons.thermal.estimators.generic import local_energy_generic_cholesky
+from ipie.addons.thermal.estimators.thermal import one_rdm_from_G
+
+
+def local_energy_from_density_matrix(
+    hamiltonian: Union[GenericRealChol, GenericComplexChol],
+    trial: Union[OneBody, MeanField],
+    P: numpy.ndarray,
+):
+    """Compute local energy from a given density matrix P.
+
+    Parameters
+    ----------
+    hamiltonian : hamiltonian object
+        Hamiltonian being studied.
+    trial : trial wavefunction object
+        Trial wavefunction.
+    P : np.ndarray
+        Walker density matrix.
+
+    Returns:
+    -------
+    local_energy : tuple / array
+        Total, one-body and two-body energies.
+    """
+    assert len(P) == 2
+    return local_energy_generic_cholesky(hamiltonian, P)
+
+
+def local_energy(hamiltonian, walker, trial):
+    return local_energy_from_density_matrix(hamiltonian, trial, one_rdm_from_G(walker.G))
diff --git a/ipie/addons/thermal/estimators/particle_number.py b/ipie/addons/thermal/estimators/particle_number.py
new file mode 100644
index 00000000..e5a6a888
--- /dev/null
+++ b/ipie/addons/thermal/estimators/particle_number.py
@@ -0,0 +1,91 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+
+from ipie.addons.thermal.estimators.thermal import one_rdm_from_G
+from ipie.estimators.estimator_base import EstimatorBase
+from ipie.utils.backend import arraylib as xp
+
+
+def particle_number(dmat: numpy.ndarray):
+    """Compute average particle number from the thermal 1RDM.
+
+    Parameters
+    ----------
+    dmat : :class:`numpy.ndarray`
+        Thermal 1RDM.
+
+    Returns
+    -------
+    nav : float
+        Average particle number.
+    """
+    nav = dmat[0].trace() + dmat[1].trace()
+    return nav
+
+
+class ThermalNumberEstimator(EstimatorBase):
+    def __init__(self, hamiltonian=None, trial=None, filename=None):
+        # We define a dictionary to contain whatever we want to compute.
+        # Note we typically want to separate the numerator and denominator of
+        # the estimator.
+        # We require complex valued buffers for accumulation
+        self._data = {
+            "NavNumer": 0.0j,
+            "NavDenom": 0.0j,
+            "Nav": 0.0j,
+        }
+
+        # We also need to specify the shape of the desired estimator
+        self._shape = (len(self.names),)
+
+        # Optional but good to know (we can redirect to custom filepath (ascii)
+        # and / or print to stdout but we shouldnt do this for non scalar
+        # quantities.
+        self._data_index = {k: i for i, k in enumerate(list(self._data.keys()))}
+        self.print_to_stdout = True
+        self.ascii_filename = filename
+
+        # Must specify that we're dealing with array valued estimator.
+        self.scalar_estimator = True
+
+    def compute_estimator(self, system=None, walkers=None, hamiltonian=None, trial=None):
+        if walkers is None:
+            raise ValueError("Walkers cannot be none in estimator.")
+        for iw in range(walkers.nwalkers):
+            # Want the full Green's function when calculating observables.
+            walkers.calc_greens_function(iw, slice_ix=walkers.stack[iw].nslice)
+            nav_iw = particle_number(one_rdm_from_G(xp.array([walkers.Ga[iw], walkers.Gb[iw]])))
+            self._data["NavNumer"] += walkers.weight[iw] * nav_iw.real
+
+        self._data["NavDenom"] = sum(walkers.weight)
+
+    def get_index(self, name):
+        index = self._data_index.get(name, None)
+
+        if index is None:
+            raise RuntimeError(f"Unknown estimator {name}")
+
+        return index
+
+    def post_reduce_hook(self, data):
+        ix_proj = self._data_index["Nav"]
+        ix_nume = self._data_index["NavNumer"]
+        ix_deno = self._data_index["NavDenom"]
+        data[ix_proj] = data[ix_nume] / data[ix_deno]
diff --git a/ipie/addons/thermal/estimators/tests/__init__.py b/ipie/addons/thermal/estimators/tests/__init__.py
new file mode 100644
index 00000000..e2aed039
--- /dev/null
+++ b/ipie/addons/thermal/estimators/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/ipie/addons/thermal/estimators/tests/test_estimators.py b/ipie/addons/thermal/estimators/tests/test_estimators.py
new file mode 100644
index 00000000..e25dc6f7
--- /dev/null
+++ b/ipie/addons/thermal/estimators/tests/test_estimators.py
@@ -0,0 +1,199 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import tempfile
+from typing import Tuple, Union
+
+import numpy
+import pytest
+
+from ipie.addons.thermal.estimators.energy import ThermalEnergyEstimator
+from ipie.addons.thermal.estimators.handler import ThermalEstimatorHandler
+from ipie.addons.thermal.estimators.particle_number import ThermalNumberEstimator
+from ipie.addons.thermal.utils.testing import build_generic_test_case_handlers
+from ipie.config import MPI
+from ipie.hamiltonians.generic import Generic as HamGeneric
+from ipie.hamiltonians.generic import GenericComplexChol, GenericRealChol
+
+# System params.
+nup = 5
+ndown = 5
+nelec = (nup, ndown)
+ne = nup + ndown
+nbasis = 10
+
+# Thermal AFQMC params.
+mu = -10.0
+beta = 0.1
+timestep = 0.01
+nwalkers = 10
+lowrank = False
+
+mf_trial = True
+complex_integrals = False
+debug = True
+verbose = True
+seed = 7
+numpy.random.seed(seed)
+
+
+@pytest.mark.unit
+def test_energy_estimator():
+    # Test.
+    objs = build_generic_test_case_handlers(
+        nelec,
+        nbasis,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        complex_integrals=complex_integrals,
+        debug=debug,
+        seed=seed,
+        verbose=verbose,
+    )
+    trial = objs["trial"]
+    hamiltonian = objs["hamiltonian"]
+    walkers = objs["walkers"]
+
+    assert isinstance(hamiltonian, GenericRealChol)
+    chol = hamiltonian.chol
+
+    # GenericRealChol.
+    re_estim = ThermalEnergyEstimator(hamiltonian=hamiltonian, trial=trial)
+    re_estim.compute_estimator(walkers=walkers, hamiltonian=hamiltonian, trial=trial)
+    assert len(re_estim.names) == 5
+    assert re_estim["ENumer"].real == pytest.approx(24.66552451455761)
+    assert re_estim["ETotal"] == pytest.approx(0.0)
+    tmp = re_estim.data.copy()
+    re_estim.post_reduce_hook(tmp)
+    assert tmp[re_estim.get_index("ETotal")] == pytest.approx(2.4665524514557613)
+    assert re_estim.print_to_stdout
+    assert re_estim.ascii_filename == None
+    assert re_estim.shape == (5,)
+    header = re_estim.header_to_text
+    data_to_text = re_estim.data_to_text(tmp)
+    assert len(data_to_text.split()) == 5
+
+    # GenericComplexChol.
+    cx_chol = numpy.array(chol, dtype=numpy.complex128)
+    cx_hamiltonian = HamGeneric(
+        numpy.array(hamiltonian.H1, dtype=numpy.complex128),
+        cx_chol,
+        hamiltonian.ecore,
+        verbose=False,
+    )
+
+    assert isinstance(cx_hamiltonian, GenericComplexChol)
+
+    cx_estim = ThermalEnergyEstimator(hamiltonian=cx_hamiltonian, trial=trial)
+    cx_estim.compute_estimator(walkers=walkers, hamiltonian=cx_hamiltonian, trial=trial)
+    assert len(cx_estim.names) == 5
+    assert cx_estim["ENumer"].real == pytest.approx(24.66552451455761)
+    assert cx_estim["ETotal"] == pytest.approx(0.0)
+    tmp = cx_estim.data.copy()
+    cx_estim.post_reduce_hook(tmp)
+    assert tmp[cx_estim.get_index("ETotal")] == pytest.approx(2.4665524514557613)
+    assert cx_estim.print_to_stdout
+    assert cx_estim.ascii_filename == None
+    assert cx_estim.shape == (5,)
+    header = cx_estim.header_to_text
+    data_to_text = cx_estim.data_to_text(tmp)
+    assert len(data_to_text.split()) == 5
+
+    numpy.testing.assert_allclose(re_estim.data, cx_estim.data)
+
+
+@pytest.mark.unit
+def test_number_estimator():
+    # Test.
+    objs = build_generic_test_case_handlers(
+        nelec,
+        nbasis,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        complex_integrals=True,
+        debug=debug,
+        seed=seed,
+        verbose=verbose,
+    )
+    trial = objs["trial"]
+    hamiltonian = objs["hamiltonian"]
+    walkers = objs["walkers"]
+
+    estim = ThermalNumberEstimator(hamiltonian=hamiltonian, trial=trial)
+    estim.compute_estimator(walkers=walkers, hamiltonian=hamiltonian, trial=trial)
+    assert len(estim.names) == 3
+    assert estim["NavNumer"].real == pytest.approx(ne * nwalkers)
+    assert estim["Nav"] == pytest.approx(0.0)
+    tmp = estim.data.copy()
+    estim.post_reduce_hook(tmp)
+    assert tmp[estim.get_index("Nav")] == pytest.approx(ne)
+    assert estim.print_to_stdout
+    assert estim.ascii_filename == None
+    assert estim.shape == (3,)
+    header = estim.header_to_text
+    data_to_text = estim.data_to_text(tmp)
+    assert len(data_to_text.split()) == 3
+
+
+@pytest.mark.unit
+def test_estimator_handler():
+    with tempfile.NamedTemporaryFile() as tmp1, tempfile.NamedTemporaryFile() as tmp2:
+        # Test.
+        objs = build_generic_test_case_handlers(
+            nelec,
+            nbasis,
+            mu,
+            beta,
+            timestep,
+            nwalkers=nwalkers,
+            lowrank=lowrank,
+            mf_trial=mf_trial,
+            complex_integrals=True,
+            debug=debug,
+            seed=seed,
+            verbose=verbose,
+        )
+        trial = objs["trial"]
+        hamiltonian = objs["hamiltonian"]
+        walkers = objs["walkers"]
+
+        estim = ThermalEnergyEstimator(hamiltonian=hamiltonian, trial=trial, filename=tmp1.name)
+        estim.print_to_stdout = False
+
+        comm = MPI.COMM_WORLD
+        handler = ThermalEstimatorHandler(
+            comm, hamiltonian, trial, observables=("energy",), filename=tmp2.name
+        )
+        handler["energy1"] = estim
+        handler.json_string = ""
+        handler.initialize(comm)
+        handler.compute_estimators(hamiltonian=hamiltonian, trial=trial, walker_batch=walkers)
+
+
+if __name__ == "__main__":
+    test_energy_estimator()
+    test_number_estimator()
+    test_estimator_handler()
diff --git a/ipie/addons/thermal/estimators/tests/test_generic.py b/ipie/addons/thermal/estimators/tests/test_generic.py
new file mode 100644
index 00000000..50eb221c
--- /dev/null
+++ b/ipie/addons/thermal/estimators/tests/test_generic.py
@@ -0,0 +1,114 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+import pytest
+from typing import Tuple, Union
+
+try:
+    from ipie.addons.thermal.utils.legacy_testing import build_legacy_generic_test_case_handlers
+
+    _no_cython = False
+
+except ModuleNotFoundError:
+    _no_cython = True
+
+from ipie.config import MPI
+from ipie.addons.thermal.estimators.thermal import one_rdm_from_G
+from ipie.addons.thermal.estimators.generic import local_energy_generic_cholesky
+from ipie.addons.thermal.utils.testing import build_generic_test_case_handlers
+
+from ipie.legacy.estimators.thermal import one_rdm_from_G as legacy_one_rdm_from_G
+from ipie.legacy.estimators.generic import (
+    local_energy_generic_cholesky as legacy_local_energy_generic_cholesky,
+)
+
+comm = MPI.COMM_WORLD
+
+
+@pytest.mark.skipif(_no_cython, reason="Need to build cython modules.")
+@pytest.mark.unit
+def test_local_energy_cholesky(mf_trial=False):
+    # System params.
+    nup = 5
+    ndown = 5
+    nelec = (nup, ndown)
+    nbasis = 10
+
+    # Thermal AFQMC params.
+    mu = -10.0
+    beta = 0.1
+    timestep = 0.01
+    nwalkers = 12
+    lowrank = False
+
+    mf_trial = True
+    complex_integrals = False
+    debug = True
+    verbose = True
+    seed = 7
+    numpy.random.seed(seed)
+
+    # Test.
+    objs = build_generic_test_case_handlers(
+        nelec,
+        nbasis,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        complex_integrals=complex_integrals,
+        debug=debug,
+        seed=seed,
+        verbose=verbose,
+    )
+    trial = objs["trial"]
+    hamiltonian = objs["hamiltonian"]
+    P = one_rdm_from_G(trial.G)
+    eloc = local_energy_generic_cholesky(hamiltonian, P)
+
+    # Legacy.
+    legacy_objs = build_legacy_generic_test_case_handlers(
+        hamiltonian,
+        comm,
+        nelec,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        seed=seed,
+        verbose=verbose,
+    )
+    legacy_system = legacy_objs["system"]
+    legacy_trial = legacy_objs["trial"]
+    legacy_hamiltonian = legacy_objs["hamiltonian"]
+
+    legacy_P = legacy_one_rdm_from_G(legacy_trial.G)
+    legacy_eloc = legacy_local_energy_generic_cholesky(legacy_system, legacy_hamiltonian, legacy_P)
+
+    numpy.testing.assert_allclose(trial.G, legacy_trial.G, atol=1e-10)
+    numpy.testing.assert_allclose(P, legacy_P, atol=1e-10)
+    numpy.testing.assert_allclose(eloc, legacy_eloc, atol=1e-10)
+
+
+if __name__ == "__main__":
+    test_local_energy_cholesky(mf_trial=True)
diff --git a/ipie/addons/thermal/estimators/tests/test_generic_complex.py b/ipie/addons/thermal/estimators/tests/test_generic_complex.py
new file mode 100644
index 00000000..7de7e36a
--- /dev/null
+++ b/ipie/addons/thermal/estimators/tests/test_generic_complex.py
@@ -0,0 +1,147 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+import pytest
+from typing import Tuple, Union
+
+from ipie.hamiltonians.generic import Generic as HamGeneric
+from ipie.hamiltonians.generic import GenericRealChol, GenericComplexChol
+from ipie.addons.thermal.utils.testing import build_generic_test_case_handlers
+from ipie.addons.thermal.estimators.generic import local_energy_generic_cholesky
+from ipie.addons.thermal.estimators.thermal import one_rdm_from_G
+
+# System params.
+nup = 5
+ndown = 5
+nelec = (nup, ndown)
+nbasis = 10
+
+# Thermal AFQMC params.
+mu = -10.0
+beta = 0.1
+timestep = 0.01
+nwalkers = 12
+lowrank = False
+
+mf_trial = True
+complex_integrals = False
+debug = True
+verbose = True
+seed = 7
+numpy.random.seed(seed)
+
+
+@pytest.mark.unit
+def test_local_energy_vs_real():
+    # Test.
+    objs = build_generic_test_case_handlers(
+        nelec,
+        nbasis,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        complex_integrals=complex_integrals,
+        debug=debug,
+        seed=seed,
+        verbose=verbose,
+    )
+    trial = objs["trial"]
+    walkers = objs["walkers"]
+    hamiltonian = objs["hamiltonian"]
+
+    assert isinstance(hamiltonian, GenericRealChol)
+
+    chol = hamiltonian.chol
+    cx_chol = numpy.array(chol, dtype=numpy.complex128)
+    cx_hamiltonian = HamGeneric(
+        numpy.array(hamiltonian.H1, dtype=numpy.complex128),
+        cx_chol,
+        hamiltonian.ecore,
+        verbose=False,
+    )
+
+    assert isinstance(cx_hamiltonian, GenericComplexChol)
+
+    for iw in range(walkers.nwalkers):
+        P = one_rdm_from_G(numpy.array([walkers.Ga[iw], walkers.Gb[iw]]))
+        energy = local_energy_generic_cholesky(hamiltonian, P)
+        cx_energy = local_energy_generic_cholesky(cx_hamiltonian, P)
+        numpy.testing.assert_allclose(energy, cx_energy, atol=1e-10)
+
+
+@pytest.mark.unit
+def test_local_energy_vs_eri():
+    # Test.
+    objs = build_generic_test_case_handlers(
+        nelec,
+        nbasis,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        debug=debug,
+        complex_integrals=True,
+        with_eri=True,
+        seed=seed,
+        verbose=verbose,
+    )
+    trial = objs["trial"]
+    walkers = objs["walkers"]
+    hamiltonian = objs["hamiltonian"]
+    assert isinstance(hamiltonian, GenericComplexChol)
+    eri = objs["eri"].reshape(nbasis, nbasis, nbasis, nbasis)
+
+    chol = hamiltonian.chol.copy()
+    nchol = chol.shape[1]
+    chol = chol.reshape(nbasis, nbasis, nchol)
+
+    # Check if chol and eri are consistent.
+    eri_chol = numpy.einsum("mnx,slx->mnls", chol, chol.conj())
+    numpy.testing.assert_allclose(eri, eri_chol, atol=1e-10)
+
+    for iw in range(walkers.nwalkers):
+        P = one_rdm_from_G(numpy.array([walkers.Ga[iw], walkers.Gb[iw]]))
+        Pa, Pb = P
+        Ptot = Pa + Pb
+        etot, e1, e2 = local_energy_generic_cholesky(hamiltonian, P)
+
+        # Test 1-body term.
+        h1e = hamiltonian.H1[0]
+        e1ref = numpy.einsum("ij,ij->", h1e, Ptot)
+        numpy.testing.assert_allclose(e1, e1ref, atol=1e-10)
+
+        # Test 2-body term.
+        ecoul = 0.5 * numpy.einsum("ijkl,ij,kl->", eri, Ptot, Ptot)
+        exx = -0.5 * numpy.einsum("ijkl,il,kj->", eri, Pa, Pa)
+        exx -= 0.5 * numpy.einsum("ijkl,il,kj->", eri, Pb, Pb)
+        e2ref = ecoul + exx
+        numpy.testing.assert_allclose(e2, e2ref, atol=1e-10)
+
+        etotref = e1ref + e2ref
+        numpy.testing.assert_allclose(etot, etotref, atol=1e-10)
+
+
+if __name__ == "__main__":
+    test_local_energy_vs_real()
+    test_local_energy_vs_eri()
diff --git a/ipie/addons/thermal/estimators/thermal.py b/ipie/addons/thermal/estimators/thermal.py
new file mode 100644
index 00000000..b478c1ed
--- /dev/null
+++ b/ipie/addons/thermal/estimators/thermal.py
@@ -0,0 +1,89 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+import scipy.linalg
+
+
+def one_rdm_from_G(G):
+    r"""Compute one-particle reduced density matrix from Green's function.
+
+    .. math::
+        rho_{ij} = \langle c_{i}^{\dagger} c_{j} \rangle \\
+                 = 1 - G_{ji}
+    Parameters
+    ----------
+    G : :class:`numpy.ndarray`
+        Thermal Green's function.
+
+    Returns
+    -------
+    P : :class:`numpy.ndarray`
+        Thermal 1RDM.
+    """
+    I = numpy.identity(G.shape[-1])
+    return numpy.array([I - G[0].T, I - G[1].T], dtype=numpy.complex128)
+
+
+def one_rdm_stable(BT, num_slices):
+    nbasis = BT.shape[-1]
+    G = []
+    for spin in [0, 1]:
+        # Need to construct the product A(l) = B_l B_{l-1}..B_L...B_{l+1} in
+        # stable way. Iteratively construct column pivoted QR decompositions
+        # (A = QDT) starting from the rightmost (product of) propagator(s).
+        (Q1, R1, P1) = scipy.linalg.qr(BT[spin], pivoting=True, check_finite=False)
+        # Form D matrices
+        D1 = numpy.diag(R1.diagonal())
+        D1inv = numpy.diag(1.0 / R1.diagonal())
+        T1 = numpy.einsum("ii,ij->ij", D1inv, R1)
+        # permute them
+        T1[:, P1] = T1[:, range(nbasis)]
+
+        for i in range(0, num_slices - 1):
+            C2 = numpy.dot(numpy.dot(BT[spin], Q1), D1)
+            (Q1, R1, P1) = scipy.linalg.qr(C2, pivoting=True, check_finite=False)
+            # Compute D matrices
+            D1inv = numpy.diag(1.0 / R1.diagonal())
+            D1 = numpy.diag(R1.diagonal())
+            tmp = numpy.einsum("ii,ij->ij", D1inv, R1)
+            tmp[:, P1] = tmp[:, range(nbasis)]
+            T1 = numpy.dot(tmp, T1)
+        # G^{-1} = 1+A = 1+QDT = Q (Q^{-1}T^{-1}+D) T
+        # Write D = Db^{-1} Ds
+        # Then G^{-1} = Q Db^{-1}(Db Q^{-1}T^{-1}+Ds) T
+        Db = numpy.zeros(BT[spin].shape, BT[spin].dtype)
+        Ds = numpy.zeros(BT[spin].shape, BT[spin].dtype)
+        for i in range(Db.shape[0]):
+            absDlcr = abs(Db[i, i])
+            if absDlcr > 1.0:
+                Db[i, i] = 1.0 / absDlcr
+                Ds[i, i] = numpy.sign(D1[i, i])
+            else:
+                Db[i, i] = 1.0
+                Ds[i, i] = D1[i, i]
+
+        T1inv = scipy.linalg.inv(T1, check_finite=False)
+        # C = (Db Q^{-1}T^{-1}+Ds)
+        C = numpy.dot(numpy.einsum("ii,ij->ij", Db, Q1.conj().T), T1inv) + Ds
+        Cinv = scipy.linalg.inv(C, check_finite=False)
+
+        # Then G = T^{-1} C^{-1} Db Q^{-1}
+        # Q is unitary.
+        G.append(numpy.dot(numpy.dot(T1inv, Cinv), numpy.einsum("ii,ij->ij", Db, Q1.conj().T)))
+    return one_rdm_from_G(numpy.array(G))
diff --git a/ipie/addons/thermal/propagation/__init__.py b/ipie/addons/thermal/propagation/__init__.py
new file mode 100644
index 00000000..f91ef518
--- /dev/null
+++ b/ipie/addons/thermal/propagation/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
diff --git a/ipie/addons/thermal/propagation/force_bias.py b/ipie/addons/thermal/propagation/force_bias.py
new file mode 100644
index 00000000..815f8aa4
--- /dev/null
+++ b/ipie/addons/thermal/propagation/force_bias.py
@@ -0,0 +1,72 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import plum
+import numpy
+
+from ipie.hamiltonians.generic import GenericRealChol, GenericComplexChol
+from ipie.addons.thermal.estimators.thermal import one_rdm_from_G
+from ipie.utils.backend import arraylib as xp
+
+
+@plum.dispatch
+def construct_force_bias(hamiltonian: GenericRealChol, walkers):
+    r"""Compute optimal force bias.
+
+    Parameters
+    ----------
+    G: :class:`numpy.ndarray`
+        Walker's 1RDM: <c_i^{\dagger}c_j>.
+
+    Returns
+    -------
+    xbar : :class:`numpy.ndarray`
+        Force bias.
+    """
+    vbias = xp.empty((walkers.nwalkers, hamiltonian.nchol), dtype=walkers.Ga.dtype)
+
+    for iw in range(walkers.nwalkers):
+        P = one_rdm_from_G(numpy.array([walkers.Ga[iw], walkers.Gb[iw]]))
+        vbias[iw] = hamiltonian.chol.T.dot(P[0].ravel()) + hamiltonian.chol.T.dot(P[1].ravel())
+
+    return vbias
+
+
+@plum.dispatch
+def construct_force_bias(hamiltonian: GenericComplexChol, walkers):
+    r"""Compute optimal force bias.
+
+    Parameters
+    ----------
+    G: :class:`numpy.ndarray`
+        Walker's 1RDM: <c_i^{\dagger}c_j>.
+
+    Returns
+    -------
+    xbar : :class:`numpy.ndarray`
+        Force bias.
+    """
+    nchol = hamiltonian.nchol
+    vbias = xp.empty((walkers.nwalkers, hamiltonian.nfields), dtype=walkers.Ga.dtype)
+
+    for iw in range(walkers.nwalkers):
+        P = one_rdm_from_G(numpy.array([walkers.Ga[iw], walkers.Gb[iw]]))
+        vbias[iw, :nchol] = hamiltonian.A.T.dot(P[0].ravel()) + hamiltonian.A.T.dot(P[1].ravel())
+        vbias[iw, nchol:] = hamiltonian.B.T.dot(P[0].ravel()) + hamiltonian.B.T.dot(P[1].ravel())
+
+    return vbias
diff --git a/ipie/addons/thermal/propagation/operations.py b/ipie/addons/thermal/propagation/operations.py
new file mode 100644
index 00000000..90b311fd
--- /dev/null
+++ b/ipie/addons/thermal/propagation/operations.py
@@ -0,0 +1,46 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+from ipie.utils.backend import arraylib as xp
+
+
+def apply_exponential(VHS, exp_nmax):
+    """Apply exponential propagator of the HS transformation
+
+    Parameters
+    ----------
+    phi : numpy array
+        a state
+    VHS : numpy array
+        HS transformation potential
+
+    Returns
+    -------
+    phi : numpy array
+        Exp(VHS) * phi
+    """
+    # Temporary array for matrix exponentiation.
+    phi = xp.identity(VHS.shape[-1], dtype=xp.complex128)
+    Temp = xp.zeros(phi.shape, dtype=phi.dtype)
+    xp.copyto(Temp, phi)
+
+    for n in range(1, exp_nmax + 1):
+        Temp = VHS.dot(Temp) / n
+        phi += Temp
+
+    return phi  # Shape (nbasis, nbasis).
diff --git a/ipie/addons/thermal/propagation/phaseless_base.py b/ipie/addons/thermal/propagation/phaseless_base.py
new file mode 100644
index 00000000..42578482
--- /dev/null
+++ b/ipie/addons/thermal/propagation/phaseless_base.py
@@ -0,0 +1,344 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import time
+import plum
+import math
+import cmath
+import numpy
+import scipy.linalg
+
+from abc import abstractmethod
+from ipie.utils.backend import arraylib as xp
+from ipie.propagation.continuous_base import ContinuousBase
+from ipie.propagation.operations import apply_exponential
+from ipie.hamiltonians.generic import GenericRealChol, GenericComplexChol
+
+from ipie.addons.thermal.estimators.thermal import one_rdm_from_G
+from ipie.addons.thermal.propagation.force_bias import construct_force_bias
+
+# TODO: Add lowrank implementation. See: https://github.com/JoonhoLee-Group/ipie/issues/302
+# Ref: 10.1103/PhysRevB.80.214116 for bounds.
+
+
+@plum.dispatch
+def construct_mean_field_shift(hamiltonian: GenericRealChol, trial):
+    r"""Compute mean field shift.
+
+    .. math::
+
+        \bar{v}_n = \sum_{ik\sigma} v_{(ik),n} P_{ik\sigma}
+
+    """
+    # hamiltonian.chol has shape (nbasis^2, nchol).
+    P = one_rdm_from_G(trial.G)
+    P = (P[0] + P[1]).ravel()
+    tmp_real = numpy.dot(hamiltonian.chol.T, P.real)
+    tmp_imag = numpy.dot(hamiltonian.chol.T, P.imag)
+    mf_shift = 1.0j * tmp_real - tmp_imag
+    return mf_shift  # Shape (nchol,).
+
+
+@plum.dispatch
+def construct_mean_field_shift(hamiltonian: GenericComplexChol, trial):
+    r"""Compute mean field shift.
+
+    .. math::
+
+        \bar{v}_n = \sum_{ik\sigma} v_{(ik),n} P_{ik\sigma}
+
+    """
+    # hamiltonian.chol has shape (nbasis^2, nchol).
+    P = one_rdm_from_G(trial.G)
+    P = (P[0] + P[1]).ravel()
+    nchol = hamiltonian.nchol
+    mf_shift = numpy.zeros(hamiltonian.nfields, dtype=hamiltonian.chol.dtype)
+    mf_shift[:nchol] = 1j * numpy.dot(hamiltonian.A.T, P.ravel())
+    mf_shift[nchol:] = 1j * numpy.dot(hamiltonian.B.T, P.ravel())
+    return mf_shift  # Shape (nchol,).
+
+
+class PhaselessBase(ContinuousBase):
+    """A base class for generic continuous HS transform FT-AFQMC propagators."""
+
+    def __init__(self, timestep, mu, lowrank=False, exp_nmax=6, verbose=False):
+        super().__init__(timestep, verbose=verbose)
+        self.mu = mu
+        self.sqrt_dt = self.dt**0.5
+        self.isqrt_dt = 1j * self.sqrt_dt
+
+        self.nfb_trig = 0  # number of force bias triggered
+        self.ebound = (2.0 / self.dt) ** 0.5  # energy bound range
+        self.fbbound = 1.0
+        self.mpi_handler = None
+        self.lowrank = lowrank
+        self.exp_nmax = exp_nmax
+
+    def build(self, hamiltonian, trial=None, walkers=None, mpi_handler=None, verbose=False):
+        # dt/2 one-body propagator
+        start = time.time()
+        self.mf_shift = construct_mean_field_shift(hamiltonian, trial)
+
+        if verbose:
+            print(f"# Time to mean field shift: {time.time() - start} s")
+            print(
+                "# Absolute value of maximum component of mean field shift: "
+                "{:13.8e}.".format(numpy.max(numpy.abs(self.mf_shift)))
+            )
+
+        # Construct one-body propagator
+        self.BH1 = self.construct_one_body_propagator(hamiltonian)
+
+        # Allocate force bias (we don't need to do this here - it will be allocated when it is needed)
+        self.vbias = None
+
+        # Legacy attributes.
+        self.mf_core = hamiltonian.ecore + 0.5 * numpy.dot(self.mf_shift, self.mf_shift)
+        self.mf_const_fac = cmath.exp(-self.dt * self.mf_core)
+
+    @plum.dispatch
+    def construct_one_body_propagator(self, hamiltonian: GenericRealChol):
+        r"""Construct mean-field shifted one-body propagator.
+
+        .. math::
+
+            H1 \rightarrow H1 - v0
+            v0_{ik} = \sum_n v_{(ik),n} \bar{v}_n
+
+        Parameters
+        ----------
+        hamiltonian : hamiltonian class
+            Generic hamiltonian object.
+        dt : float
+            Timestep.
+        """
+        nb = hamiltonian.nbasis
+        shift = 1j * numpy.einsum("mx,x->m", hamiltonian.chol, self.mf_shift).reshape(nb, nb)
+        muN = self.mu * numpy.identity(nb, dtype=hamiltonian.H1.dtype)
+        H1 = hamiltonian.h1e_mod - numpy.array([shift + muN, shift + muN])
+        expH1 = numpy.array(
+            [scipy.linalg.expm(-0.5 * self.dt * H1[0]), scipy.linalg.expm(-0.5 * self.dt * H1[1])]
+        )
+        return expH1  # Shape (nbasis, nbasis).
+
+    @plum.dispatch
+    def construct_one_body_propagator(self, hamiltonian: GenericComplexChol):
+        r"""Construct mean-field shifted one-body propagator.
+
+        .. math::
+
+            H1 \rightarrow H1 - v0
+            v0_{ik} = \sum_n v_{(ik),n} \bar{v}_n
+
+        Parameters
+        ----------
+        hamiltonian : hamiltonian class
+            Generic hamiltonian object.
+        dt : float
+            Timestep.
+        """
+        nb = hamiltonian.nbasis
+        nchol = hamiltonian.nchol
+        shift = xp.zeros((nb, nb), dtype=hamiltonian.chol.dtype)
+        shift = 1j * numpy.einsum("mx,x->m", hamiltonian.A, self.mf_shift[:nchol]).reshape(nb, nb)
+        shift += 1j * numpy.einsum("mx,x->m", hamiltonian.B, self.mf_shift[nchol:]).reshape(nb, nb)
+        muN = self.mu * numpy.identity(nb, dtype=hamiltonian.H1.dtype)
+        H1 = hamiltonian.h1e_mod - numpy.array([shift + muN, shift + muN])
+        expH1 = numpy.array(
+            [scipy.linalg.expm(-0.5 * self.dt * H1[0]), scipy.linalg.expm(-0.5 * self.dt * H1[1])]
+        )
+        return expH1  # Shape (nbasis, nbasis).
+
+    def construct_two_body_propagator(self, walkers, hamiltonian, trial, debug=False):
+        r"""Construct two-body propagator.
+
+        .. math::
+            \bar{x}_n &= \sqrt{\Delta\tau} \bar{v}_n \\
+            x_\mathrm{shifted}_n &= x_n - \bar{x}_n \\
+            C_{MF} &= -\sqrt{\Delta\tau} \sum_{n} x_\mathrm{shifted}_n \bar{v}_n \\
+            &= -\sqrt{\Delta\tau} \sum_{n} (x_n - \sqrt{\Delta\tau} \bar{v}_n) \bar{v}_n \\
+            &= -\sqrt{\Delta\tau} \sum_{n} x_n \bar{v}_n + \Delta\tau \sum_{n} \bar{v}_n^2.
+
+        Parameters
+        ----------
+        walkers: walker class
+            UHFThermalWalkers object.
+        hamiltonian : hamiltonian class
+            Generic hamiltonian object.
+        trial : trial class
+            Trial dnsity matrix.
+        """
+        # Optimal force bias
+        xbar = xp.zeros((walkers.nwalkers, hamiltonian.nfields))
+        start_time = time.time()
+        self.vbias = construct_force_bias(hamiltonian, walkers)
+        xbar = -self.sqrt_dt * (1j * self.vbias - self.mf_shift)
+        self.timer.tfbias += time.time() - start_time
+
+        # Force bias bounding
+        xbar = self.apply_bound_force_bias(xbar, self.fbbound)
+
+        # Normally distrubted auxiliary fields.
+        xi = xp.random.normal(0.0, 1.0, hamiltonian.nfields * walkers.nwalkers).reshape(
+            walkers.nwalkers, hamiltonian.nfields
+        )
+
+        if debug:
+            self.xi = xi  # For debugging.
+        xshifted = xi - xbar  # Shape (nwalkers, nfields).
+
+        # Constant factor arising from force bias and mean field shift
+        cmf = -self.sqrt_dt * xp.einsum("wx,x->w", xshifted, self.mf_shift)  # Shape (nwalkers,).
+        # Constant factor arising from shifting the propability distribution.
+        cfb = xp.einsum("wx,wx->w", xi, xbar) - 0.5 * xp.einsum(
+            "wx,wx->w", xbar, xbar
+        )  # Shape (nwalkers,).
+
+        xshifted = xshifted.T.copy()  # Shape (nfields, nwalkers).
+        VHS = self.construct_VHS(hamiltonian, xshifted)  # Shape (nwalkers, nbasis, nbasis).
+        return cmf, cfb, xshifted, VHS
+
+    def propagate_walkers_one_body(self, walkers):
+        pass
+
+    def propagate_walkers_two_body(self, walkers, hamiltonian, trial):
+        pass
+
+    def propagate_walkers(self, walkers, hamiltonian, trial, eshift=0.0, debug=False):
+        start_time = time.time()
+        cmf, cfb, xshifted, VHS = self.construct_two_body_propagator(
+            walkers, hamiltonian, trial, debug=debug
+        )
+        assert walkers.nwalkers == xshifted.shape[-1]
+        self.timer.tvhs += time.time() - start_time
+        assert len(VHS.shape) == 3
+
+        start_time = time.time()
+        for iw in range(walkers.nwalkers):
+            stack = walkers.stack[iw]
+            phi = xp.identity(VHS[iw].shape[-1], dtype=xp.complex128)
+            BV = apply_exponential(phi, VHS[iw], self.exp_nmax)  # Shape (nbasis, nbasis).
+            B = numpy.array([BV.dot(self.BH1[0]), BV.dot(self.BH1[1])])
+            B = numpy.array([self.BH1[0].dot(B[0]), self.BH1[1].dot(B[1])])
+
+            # Compute determinant ratio det(1+A')/det(1+A).
+            # 1. Current walker's Green's function.
+            tix = stack.nslice
+            start_time = time.time()
+            G = walkers.calc_greens_function(iw, slice_ix=tix, inplace=False)
+            self.timer.tgf += time.time() - start_time
+
+            # 2. Compute updated Green's function.
+            start_time = time.time()
+            stack.update_new(B)
+            walkers.calc_greens_function(iw, slice_ix=tix, inplace=True)
+
+            # 3. Compute det(G/G')
+            # Now apply phaseless approximation.
+            # Use legacy thermal weight update for now.
+            self.update_weight_legacy(walkers, iw, G, cfb, cmf, eshift)
+            # self.update_weight(walkers, iw, G, cfb, cmf, eshift)
+
+            self.timer.tupdate += time.time() - start_time
+
+    def update_weight(self, walkers, iw, G, cfb, cmf, eshift):
+        """Update weight for walker `iw`."""
+        M0a = scipy.linalg.det(G[0], check_finite=False)
+        M0b = scipy.linalg.det(G[1], check_finite=False)
+        Mnewa = scipy.linalg.det(walkers.Ga[iw], check_finite=False)
+        Mnewb = scipy.linalg.det(walkers.Gb[iw], check_finite=False)
+
+        # ovlp = det( G^{-1} )
+        ovlp_ratio = (M0a * M0b) / (Mnewa * Mnewb)  # ovlp_new / ovlp_old
+        hybrid_energy = -(xp.log(ovlp_ratio) + cfb[iw] + cmf[iw]) / self.dt  # Scalar.
+        hybrid_energy = self.apply_bound_hybrid(hybrid_energy, eshift)
+        importance_function = xp.exp(
+            -self.dt * (0.5 * (hybrid_energy + walkers.hybrid_energy) - eshift)
+        )
+
+        # Splitting w_k = |I(x, \bar{x}, |phi_k>)| e^{i theta_k}, where `k`
+        # labels the time slice.
+        magn = xp.abs(importance_function)
+        walkers.hybrid_energy = hybrid_energy
+        dtheta = (-self.dt * hybrid_energy - cfb[iw]).imag  # Scalar.
+        cosine_fac = xp.amax([0.0, xp.cos(dtheta)])
+        walkers.weight[iw] *= magn * cosine_fac
+        walkers.M0a[iw] = Mnewa
+        walkers.M0b[iw] = Mnewb
+
+    def update_weight_legacy(self, walkers, iw, G, cfb, cmf, eshift):
+        """Update weight for walker `iw` using legacy code."""
+        # M0a = walkers.M0a[iw]
+        # M0b = walkers.M0b[iw]
+        M0a = scipy.linalg.det(G[0], check_finite=False)
+        M0b = scipy.linalg.det(G[1], check_finite=False)
+        Mnewa = scipy.linalg.det(walkers.Ga[iw], check_finite=False)
+        Mnewb = scipy.linalg.det(walkers.Gb[iw], check_finite=False)
+        _cfb = cfb[iw]
+        _cmf = cmf[iw]
+
+        try:
+            # Could save M0 rather than recompute.
+            oratio = (M0a * M0b) / (Mnewa * Mnewb)
+            # Might want to cap this at some point.
+            hybrid_energy = cmath.log(oratio) + _cfb + _cmf
+            Q = cmath.exp(hybrid_energy)
+            expQ = self.mf_const_fac * Q
+            (magn, _) = cmath.polar(expQ)
+
+            if not math.isinf(magn):
+                # Determine cosine phase from Arg(det(1+A'(x))/det(1+A(x))).
+                # Note this doesn't include exponential factor from shifting
+                # proability distribution.
+                dtheta = cmath.phase(cmath.exp(hybrid_energy - _cfb))
+                cosine_fac = max(0, math.cos(dtheta))
+                walkers.weight[iw] *= magn * cosine_fac
+                walkers.M0a[iw] = Mnewa
+                walkers.M0b[iw] = Mnewb
+
+            else:
+                walkers.weight[iw] = 0.0
+
+        except ZeroDivisionError:
+            walkers.weight[iw] = 0.0
+
+    def apply_bound_force_bias(self, xbar, max_bound=1.0):
+        absxbar = xp.abs(xbar)
+        idx_to_rescale = absxbar > max_bound
+        nonzeros = absxbar > 1e-13
+        xbar_rescaled = xbar.copy()
+        xbar_rescaled[nonzeros] = xbar_rescaled[nonzeros] / absxbar[nonzeros]
+        xbar = xp.where(idx_to_rescale, xbar_rescaled, xbar)
+        self.nfb_trig += xp.sum(idx_to_rescale)
+        return xbar
+
+    def apply_bound_hybrid(self, ehyb, eshift):  # Shift is a number but ehyb is not
+        # For initial steps until first estimator communication, `eshift` will be
+        # zero and hybrid energy can be incorrect. So just avoid capping for
+        # first block until reasonable estimate of `eshift` can be computed.
+        if abs(eshift) < 1e-10:
+            return ehyb
+
+        emax = eshift.real + self.ebound
+        emin = eshift.real - self.ebound
+        return xp.minimum(emax, xp.maximum(ehyb, emin))
+
+    # Form VHS.
+    @abstractmethod
+    def construct_VHS(self, hamiltonian, xshifted):
+        pass
diff --git a/ipie/addons/thermal/propagation/phaseless_generic.py b/ipie/addons/thermal/propagation/phaseless_generic.py
new file mode 100644
index 00000000..b60fd1aa
--- /dev/null
+++ b/ipie/addons/thermal/propagation/phaseless_generic.py
@@ -0,0 +1,50 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import plum
+
+from ipie.hamiltonians.generic import GenericRealChol, GenericComplexChol
+from ipie.addons.thermal.propagation.phaseless_base import PhaselessBase
+from ipie.utils.backend import arraylib as xp
+
+
+class PhaselessGeneric(PhaselessBase):
+    """A class for performing phaseless propagation with real, generic, hamiltonian."""
+
+    def __init__(self, time_step, mu, exp_nmax=6, lowrank=False, verbose=False):
+        super().__init__(time_step, mu, lowrank=lowrank, exp_nmax=exp_nmax, verbose=verbose)
+
+    @plum.dispatch
+    def construct_VHS(self, hamiltonian: GenericRealChol, xshifted: xp.ndarray):
+        """Includes `nwalkers`."""
+        nwalkers = xshifted.shape[-1]  # Shape (nfields, nwalkers).
+        VHS = hamiltonian.chol.dot(xshifted)  # Shape (nbasis^2, nwalkers).
+        VHS = self.isqrt_dt * VHS.T.reshape(nwalkers, hamiltonian.nbasis, hamiltonian.nbasis)
+        return VHS  # Shape (nwalkers, nbasis, nbasis).
+
+    @plum.dispatch
+    def construct_VHS(self, hamiltonian: GenericComplexChol, xshifted: xp.ndarray):
+        """Includes `nwalkers`."""
+        nwalkers = xshifted.shape[-1]
+        nchol = hamiltonian.nchol
+        VHS = self.isqrt_dt * (
+            hamiltonian.A.dot(xshifted[:nchol]) + hamiltonian.B.dot(xshifted[nchol:])
+        )
+        VHS = VHS.T.copy()
+        VHS = VHS.reshape(nwalkers, hamiltonian.nbasis, hamiltonian.nbasis)
+        return VHS
diff --git a/ipie/addons/thermal/propagation/propagator.py b/ipie/addons/thermal/propagation/propagator.py
new file mode 100644
index 00000000..ab9b4600
--- /dev/null
+++ b/ipie/addons/thermal/propagation/propagator.py
@@ -0,0 +1,22 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+from ipie.hamiltonians.generic import GenericRealChol, GenericComplexChol
+from ipie.addons.thermal.propagation.phaseless_generic import PhaselessGeneric
+
+Propagator = {GenericRealChol: PhaselessGeneric, GenericComplexChol: PhaselessGeneric}
diff --git a/ipie/addons/thermal/propagation/tests/__init__.py b/ipie/addons/thermal/propagation/tests/__init__.py
new file mode 100644
index 00000000..f91ef518
--- /dev/null
+++ b/ipie/addons/thermal/propagation/tests/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
diff --git a/ipie/addons/thermal/propagation/tests/test_prop_generic.py b/ipie/addons/thermal/propagation/tests/test_prop_generic.py
new file mode 100644
index 00000000..6674545c
--- /dev/null
+++ b/ipie/addons/thermal/propagation/tests/test_prop_generic.py
@@ -0,0 +1,291 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+import pytest
+
+try:
+    from ipie.addons.thermal.utils.legacy_testing import build_legacy_generic_test_case_handlers
+    from ipie.addons.thermal.utils.legacy_testing import legacy_propagate_walkers
+
+    _no_cython = False
+
+except ModuleNotFoundError:
+    _no_cython = True
+
+from ipie.config import MPI
+from ipie.addons.thermal.estimators.generic import local_energy_generic_cholesky
+from ipie.addons.thermal.estimators.thermal import one_rdm_from_G
+from ipie.addons.thermal.utils.testing import build_generic_test_case_handlers
+
+from ipie.legacy.estimators.generic import (
+    local_energy_generic_cholesky as legacy_local_energy_generic_cholesky,
+)
+from ipie.legacy.estimators.thermal import one_rdm_from_G as legacy_one_rdm_from_G
+
+comm = MPI.COMM_WORLD
+
+# System params.
+nup = 5
+ndown = 5
+nelec = (nup, ndown)
+nbasis = 10
+
+# Thermal AFQMC params.
+mu = -10.0
+beta = 0.1
+timestep = 0.01
+nwalkers = 12
+nblocks = 12
+lowrank = False
+
+mf_trial = True
+complex_integrals = False
+debug = True
+verbose = True
+seed = 7
+numpy.random.seed(seed)
+
+
+@pytest.mark.skipif(_no_cython, reason="Need to build cython modules.")
+@pytest.mark.unit
+def test_mf_shift():
+    # Test.
+    objs = build_generic_test_case_handlers(
+        nelec,
+        nbasis,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        complex_integrals=complex_integrals,
+        debug=debug,
+        seed=seed,
+        verbose=verbose,
+    )
+    hamiltonian = objs["hamiltonian"]
+    propagator = objs["propagator"]
+
+    # Legacy.
+    legacy_objs = build_legacy_generic_test_case_handlers(
+        hamiltonian,
+        comm,
+        nelec,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        seed=seed,
+        verbose=verbose,
+    )
+    legacy_propagator = legacy_objs["propagator"]
+
+    numpy.testing.assert_almost_equal(
+        legacy_propagator.propagator.mf_shift, propagator.mf_shift, decimal=10
+    )
+
+
+@pytest.mark.skipif(_no_cython, reason="Need to build cython modules.")
+@pytest.mark.unit
+def test_BH1():
+    # Test.
+    objs = build_generic_test_case_handlers(
+        nelec,
+        nbasis,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        complex_integrals=complex_integrals,
+        debug=debug,
+        seed=seed,
+        verbose=verbose,
+    )
+    hamiltonian = objs["hamiltonian"]
+    propagator = objs["propagator"]
+
+    # Legacy.
+    legacy_objs = build_legacy_generic_test_case_handlers(
+        hamiltonian,
+        comm,
+        nelec,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        seed=seed,
+        verbose=verbose,
+    )
+    legacy_propagator = legacy_objs["propagator"]
+
+    numpy.testing.assert_almost_equal(legacy_propagator.propagator.BH1, propagator.BH1, decimal=10)
+
+
+@pytest.mark.skipif(_no_cython, reason="Need to build cython modules.")
+@pytest.mark.unit
+def test_construct_two_body_propagator():
+    # Test.
+    objs = build_generic_test_case_handlers(
+        nelec,
+        nbasis,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        complex_integrals=complex_integrals,
+        debug=debug,
+        seed=seed,
+        verbose=verbose,
+    )
+    trial = objs["trial"]
+    hamiltonian = objs["hamiltonian"]
+    walkers = objs["walkers"]
+    propagator = objs["propagator"]
+
+    # Legacy.
+    legacy_objs = build_legacy_generic_test_case_handlers(
+        hamiltonian,
+        comm,
+        nelec,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        seed=seed,
+        verbose=verbose,
+    )
+    legacy_trial = legacy_objs["trial"]
+    legacy_hamiltonian = legacy_objs["hamiltonian"]
+    legacy_walkers = legacy_objs["walkers"]
+    legacy_propagator = legacy_objs["propagator"]
+
+    cmf, cfb, xshifted, VHS = propagator.construct_two_body_propagator(
+        walkers, hamiltonian, trial, debug=True
+    )
+
+    legacy_cmf = []
+    legacy_cfb = []
+    legacy_xshifted = []
+    legacy_VHS = []
+
+    for iw in range(walkers.nwalkers):
+        _cmf, _cfb, _xshifted, _VHS = legacy_propagator.two_body_propagator(
+            legacy_walkers.walkers[iw], legacy_hamiltonian, legacy_trial, xi=propagator.xi[iw]
+        )
+        legacy_cmf.append(_cmf)
+        legacy_cfb.append(_cfb)
+        legacy_xshifted.append(_xshifted)
+        legacy_VHS.append(_VHS)
+
+    legacy_xshifted = numpy.array(legacy_xshifted).T
+
+    numpy.testing.assert_almost_equal(legacy_cmf, cmf, decimal=10)
+    numpy.testing.assert_almost_equal(legacy_cfb, cfb, decimal=10)
+    numpy.testing.assert_almost_equal(legacy_xshifted, xshifted, decimal=10)
+    numpy.testing.assert_almost_equal(legacy_VHS, VHS, decimal=10)
+
+
+@pytest.mark.skipif(_no_cython, reason="Need to build cython modules.")
+@pytest.mark.unit
+def test_phaseless_generic_propagator():
+    # Test.
+    objs = build_generic_test_case_handlers(
+        nelec,
+        nbasis,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        complex_integrals=complex_integrals,
+        debug=debug,
+        seed=seed,
+        verbose=verbose,
+    )
+    trial = objs["trial"]
+    hamiltonian = objs["hamiltonian"]
+    walkers = objs["walkers"]
+    propagator = objs["propagator"]
+
+    # Legacy.
+    legacy_objs = build_legacy_generic_test_case_handlers(
+        hamiltonian,
+        comm,
+        nelec,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        seed=seed,
+        verbose=verbose,
+    )
+    legacy_system = legacy_objs["system"]
+    legacy_trial = legacy_objs["trial"]
+    legacy_hamiltonian = legacy_objs["hamiltonian"]
+    legacy_walkers = legacy_objs["walkers"]
+    legacy_propagator = legacy_objs["propagator"]
+
+    for t in range(walkers.stack[0].nslice):
+        for iw in range(walkers.nwalkers):
+            P = one_rdm_from_G(numpy.array([walkers.Ga[iw], walkers.Gb[iw]]))
+            eloc = local_energy_generic_cholesky(hamiltonian, P)
+
+            legacy_P = legacy_one_rdm_from_G(numpy.array(legacy_walkers.walkers[iw].G))
+            legacy_eloc = legacy_local_energy_generic_cholesky(
+                legacy_system, legacy_hamiltonian, legacy_P
+            )
+
+            numpy.testing.assert_almost_equal(legacy_eloc, eloc, decimal=10)
+            numpy.testing.assert_allclose(legacy_walkers.walkers[iw].G[0], walkers.Ga[iw])
+            numpy.testing.assert_almost_equal(
+                legacy_walkers.walkers[iw].G[1], walkers.Gb[iw], decimal=10
+            )
+            numpy.testing.assert_almost_equal(legacy_P, P, decimal=10)
+            numpy.testing.assert_almost_equal(
+                legacy_walkers.walkers[iw].stack.ovlp[0], walkers.stack[iw].ovlp[0], decimal=10
+            )
+            numpy.testing.assert_almost_equal(
+                legacy_walkers.walkers[iw].stack.ovlp[1], walkers.stack[iw].ovlp[1], decimal=10
+            )
+
+        propagator.propagate_walkers(walkers, hamiltonian, trial, debug=True)
+        legacy_walkers = legacy_propagate_walkers(
+            legacy_hamiltonian, legacy_trial, legacy_walkers, legacy_propagator, xi=propagator.xi
+        )
+
+
+if __name__ == "__main__":
+    test_mf_shift()
+    test_BH1()
+    test_construct_two_body_propagator()
+    test_phaseless_generic_propagator()
diff --git a/ipie/addons/thermal/propagation/tests/ueg/test_prop_ueg.py b/ipie/addons/thermal/propagation/tests/ueg/test_prop_ueg.py
new file mode 100644
index 00000000..c773890c
--- /dev/null
+++ b/ipie/addons/thermal/propagation/tests/ueg/test_prop_ueg.py
@@ -0,0 +1,154 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import pytest
+import numpy
+
+try:
+    from ipie.addons.thermal.utils.legacy_testing import build_legacy_ueg_test_case_handlers
+    from ipie.addons.thermal.utils.legacy_testing import legacy_propagate_walkers
+    from ipie.legacy.estimators.ueg import local_energy_ueg as legacy_local_energy_ueg
+
+    _no_cython = False
+
+except ModuleNotFoundError:
+    _no_cython = True
+
+from ipie.config import MPI
+from ipie.addons.thermal.estimators.generic import local_energy_generic_cholesky
+from ipie.addons.thermal.estimators.thermal import one_rdm_from_G
+from ipie.addons.thermal.utils.testing import build_ueg_test_case_handlers
+
+from ipie.legacy.estimators.thermal import one_rdm_from_G as legacy_one_rdm_from_G
+
+comm = MPI.COMM_WORLD
+
+
+@pytest.mark.skipif(_no_cython, reason="Need to build cython modules.")
+@pytest.mark.unit
+def test_phaseless_ueg_propagator():
+    # UEG params.
+    nup = 7
+    ndown = 7
+    nelec = (nup, ndown)
+    rs = 1.0
+    ecut = 1.0
+
+    # Thermal AFQMC params.
+    mu = -1.0
+    beta = 0.1
+    timestep = 0.01
+    nwalkers = 1
+    lowrank = False
+
+    debug = True
+    verbose = False if (comm.rank != 0) else True
+    seed = 7
+    numpy.random.seed(seed)
+
+    # Test.
+    objs = build_ueg_test_case_handlers(
+        nelec,
+        rs,
+        ecut,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        debug=debug,
+        seed=seed,
+        verbose=verbose,
+    )
+    trial = objs["trial"]
+    hamiltonian = objs["hamiltonian"]
+    walkers = objs["walkers"]
+    propagator = objs["propagator"]
+
+    # Legacy.
+    legacy_objs = build_legacy_ueg_test_case_handlers(
+        comm,
+        nelec,
+        rs,
+        ecut,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        seed=seed,
+        verbose=verbose,
+    )
+    legacy_system = legacy_objs["system"]
+    legacy_trial = legacy_objs["trial"]
+    legacy_hamiltonian = legacy_objs["hamiltonian"]
+    legacy_walkers = legacy_objs["walkers"]
+    legacy_propagator = legacy_objs["propagator"]
+
+    h1e = legacy_hamiltonian.H1[0]
+    eri = legacy_hamiltonian.eri_4()
+
+    for t in range(walkers.stack[0].nslice):
+        for iw in range(walkers.nwalkers):
+            P = one_rdm_from_G(numpy.array([walkers.Ga[iw], walkers.Gb[iw]]))
+            eloc = local_energy_generic_cholesky(hamiltonian, P)
+
+            legacy_P = legacy_one_rdm_from_G(numpy.array(legacy_walkers.walkers[iw].G))
+            legacy_eloc = legacy_local_energy_ueg(legacy_system, legacy_hamiltonian, legacy_P)
+
+            legacy_Pa, legacy_Pb = legacy_P
+            legacy_Ptot = legacy_Pa + legacy_Pb
+            ref_e1 = numpy.einsum("ij,ij->", h1e, legacy_Ptot)
+
+            Ptot = legacy_Ptot
+            Pa = legacy_Pa
+            Pb = legacy_Pb
+
+            ecoul = 0.5 * numpy.einsum("ijkl,ij,kl->", eri, Ptot, Ptot)
+            exx = -0.5 * numpy.einsum("ijkl,il,kj->", eri, Pa, Pa)
+            exx -= 0.5 * numpy.einsum("ijkl,il,kj->", eri, Pb, Pb)
+            ref_e2 = ecoul + exx
+            ref_eloc = (ref_e1 + ref_e2, ref_e1, ref_e2)
+
+            numpy.testing.assert_almost_equal(legacy_P, P, decimal=10)
+            numpy.testing.assert_almost_equal(legacy_trial.dmat, trial.dmat, decimal=10)
+            numpy.testing.assert_allclose(eloc, ref_eloc, atol=1e-10)
+            numpy.testing.assert_allclose(legacy_eloc, ref_eloc, atol=1e-10)
+            numpy.testing.assert_almost_equal(legacy_eloc, eloc, decimal=10)
+
+            numpy.testing.assert_almost_equal(
+                legacy_walkers.walkers[iw].G[0], walkers.Ga[iw], decimal=10
+            )
+            numpy.testing.assert_almost_equal(
+                legacy_walkers.walkers[iw].G[1], walkers.Gb[iw], decimal=10
+            )
+            numpy.testing.assert_almost_equal(
+                legacy_walkers.walkers[iw].stack.ovlp[0], walkers.stack[iw].ovlp[0], decimal=10
+            )
+            numpy.testing.assert_almost_equal(
+                legacy_walkers.walkers[iw].stack.ovlp[1], walkers.stack[iw].ovlp[1], decimal=10
+            )
+
+        propagator.propagate_walkers(walkers, hamiltonian, trial, debug=True)
+        legacy_walkers = legacy_propagate_walkers(
+            legacy_hamiltonian, legacy_trial, legacy_walkers, legacy_propagator, xi=propagator.xi
+        )
+
+
+if __name__ == "__main__":
+    test_phaseless_ueg_propagator()
diff --git a/ipie/addons/thermal/qmc/__init__.py b/ipie/addons/thermal/qmc/__init__.py
new file mode 100644
index 00000000..f91ef518
--- /dev/null
+++ b/ipie/addons/thermal/qmc/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
diff --git a/ipie/addons/thermal/qmc/calc.py b/ipie/addons/thermal/qmc/calc.py
new file mode 100644
index 00000000..fc2adf87
--- /dev/null
+++ b/ipie/addons/thermal/qmc/calc.py
@@ -0,0 +1,156 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+"""Helper Routines for setting up a calculation"""
+
+from ipie.addons.thermal.propagation.propagator import Propagator
+from ipie.addons.thermal.qmc.options import ThermalQMCOpts, ThermalQMCParams
+from ipie.addons.thermal.qmc.thermal_afqmc import ThermalAFQMC
+from ipie.addons.thermal.trial.utils import get_trial_density_matrix
+from ipie.addons.thermal.walkers.uhf_walkers import UHFThermalWalkers
+from ipie.config import MPI
+from ipie.hamiltonians.utils import get_hamiltonian
+from ipie.systems.utils import get_system
+from ipie.utils.io import get_input_value
+from ipie.utils.mpi import MPIHandler
+
+
+def get_driver(options: dict, comm: MPI.COMM_WORLD) -> ThermalAFQMC:
+    verbosity = options.get("verbosity", 1)
+    qmc_opts = get_input_value(options, "qmc", default={}, alias=["qmc_options"])
+
+    sys_opts = get_input_value(
+        options, "system", default={}, alias=["model"], verbose=verbosity > 1
+    )
+    ham_opts = get_input_value(options, "hamiltonian", default={}, verbose=verbosity > 1)
+    # backward compatibility with previous code (to be removed)
+    for item in sys_opts.items():
+        if item[0].lower() == "name" and "name" in ham_opts.keys():
+            continue
+        ham_opts[item[0]] = item[1]
+
+    tdm_opts = get_input_value(
+        options, "trial", default={}, alias=["trial_density_matrix"], verbose=verbosity > 1
+    )
+
+    wlk_opts = get_input_value(
+        options, "walkers", default={}, alias=["walker", "walker_opts"], verbose=verbosity > 1
+    )
+
+    if comm.rank != 0:
+        verbosity = 0
+    lowrank = get_input_value(
+        wlk_opts, "lowrank", default=False, alias=["low_rank"], verbose=verbosity
+    )
+    batched = get_input_value(qmc_opts, "batched", default=False, verbose=verbosity)
+    debug = get_input_value(qmc_opts, "debug", default=False, verbose=verbosity)
+
+    if (lowrank == True) or (batched == True):
+        raise ValueError("Option not supported in thermal code.")
+    else:
+        qmc = ThermalQMCOpts(qmc_opts, verbose=0)
+        mpi_handler = MPIHandler(nmembers=qmc_opts.get("nmembers", 1), verbose=verbosity)
+        system = get_system(
+            sys_opts, verbose=verbosity, comm=comm
+        )  # Have to deal with shared comm in the future. I think we will remove this...
+        ham_file = get_input_value(ham_opts, "integrals", None, verbose=verbosity)
+        if ham_file is None:
+            raise ValueError("Hamiltonian filename not specified.")
+        pack_chol = get_input_value(
+            ham_opts, "symmetry", True, alias=["pack_chol", "pack_cholesky"], verbose=verbosity
+        )
+        hamiltonian = get_hamiltonian(
+            ham_file, mpi_handler.scomm, pack_chol=pack_chol, verbose=verbosity
+        )
+        num_elec = (system.nup, system.ndown)
+        trial = get_trial_density_matrix(
+            hamiltonian,
+            num_elec,
+            qmc.beta,
+            qmc.dt,
+            options=tdm_opts,
+            comm=comm,
+            verbose=verbosity,
+        )
+        stack_size = get_input_value(wlk_opts, "stack_size", default=10, verbose=verbosity)
+        lowrank_thresh = get_input_value(
+            wlk_opts, "lowrank_thresh", default=1e-6, alias=["low_rank_thresh"], verbose=verbosity
+        )
+        walkers = UHFThermalWalkers(
+            trial,
+            hamiltonian.nbasis,
+            qmc.nwalkers,
+            stack_size=stack_size,
+            lowrank=lowrank,
+            lowrank_thresh=lowrank_thresh,
+            verbose=verbosity,
+        )
+
+        if (comm.rank == 0) and (qmc.nsteps > 1):
+            print("Only num_steps_per_block = 1 allowed in thermal code! Resetting to value of 1.")
+
+        # pylint: disable = no-value-for-parameter
+        params = ThermalQMCParams(
+            mu=qmc.mu,
+            beta=qmc.beta,
+            num_walkers=qmc.nwalkers,
+            total_num_walkers=qmc.nwalkers * comm.size,
+            num_blocks=qmc.nblocks,
+            timestep=qmc.dt,
+            num_stblz=qmc.nstblz,
+            pop_control_freq=qmc.npop_control,
+            pop_control_method=qmc.pop_control_method,
+            rng_seed=qmc.rng_seed,
+        )
+        propagator = Propagator[type(hamiltonian)](params.timestep, params.mu)
+        propagator.build(hamiltonian, trial, walkers, mpi_handler)
+        afqmc = ThermalAFQMC(
+            hamiltonian,
+            trial,
+            walkers,
+            propagator,
+            mpi_handler,
+            params,
+            debug=debug,
+            verbose=verbosity,
+        )
+
+    return afqmc
+
+
+def build_thermal_afqmc_driver(
+    comm,
+    nelec: tuple,
+    hamiltonian_file: str = "hamiltonian.h5",
+    seed: int = None,
+    options: dict = None,
+    verbosity: int = 0,
+):
+    if comm.rank != 0:
+        verbosity = 0
+
+    sys_opts = {"nup": nelec[0], "ndown": nelec[1]}
+    ham_opts = {"integrals": hamiltonian_file}
+    qmc_opts = {"rng_seed": seed}
+
+    options["system"] = sys_opts
+    options["hamiltonian"] = ham_opts
+    options["qmc"].update(qmc_opts)
+    options["verbosity"] = verbosity
+
+    return get_driver(options, comm)
diff --git a/ipie/addons/thermal/qmc/options.py b/ipie/addons/thermal/qmc/options.py
new file mode 100644
index 00000000..cc2a28c2
--- /dev/null
+++ b/ipie/addons/thermal/qmc/options.py
@@ -0,0 +1,123 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+from dataclasses import dataclass
+from typing import ClassVar, Optional
+
+from ipie.utils.io import get_input_value
+from ipie.qmc.options import QMCOpts, QMCParams
+
+
+class ThermalQMCOpts(QMCOpts):
+    r"""Input options and certain constants / parameters derived from them.
+
+    Initialised from a dict containing the following options, not all of which
+    are required.
+
+    Attributes
+    ----------
+    batched : bool
+        Whether to do batched calculations.
+    nwalkers : int
+        Number of walkers to propagate in a simulation.
+    dt : float
+        Timestep.
+    nsteps : int
+        Number of steps per block.
+    nblocks : int
+        Number of blocks. Total number of iterations = nblocks * nsteps.
+    nstblz : int
+        Frequency of Gram-Schmidt orthogonalisation steps.
+    npop_control : int
+        Frequency of population control.
+    pop_control_method : str
+        Population control method.
+    eqlb_time : float
+        Time scale of equilibration phase. Only used to fix local
+        energy bound when using phaseless approximation.
+    neqlb : int
+        Number of time steps for the equilibration phase. Only used to fix the
+        local energy bound when using phaseless approximation.
+    rng_seed : int
+        The random number seed.
+    mu : float
+        Chemical potential.
+    beta : float
+        Inverse temperature.
+    """
+
+    # pylint: disable=dangerous-default-value
+    # TODO: Remove this class / replace with dataclass
+    def __init__(self, inputs={}, verbose=False):
+        super().__init__(inputs, verbose)
+
+        self.mu = get_input_value(
+            inputs,
+            "mu",
+            default=None,
+            verbose=verbose,
+        )
+        self.beta = get_input_value(
+            inputs,
+            "beta",
+            default=None,
+            verbose=verbose,
+        )
+
+
+@dataclass
+class ThermalQMCParams(QMCParams):
+    r"""Input options and certain constants / parameters derived from them.
+
+    Attributes
+    ----------
+    mu : float
+        Chemical potential.
+    beta : float
+        Inverse temperature.
+    num_walkers : int
+        Number of walkers **per** core / task / computational unit.
+    total_num_walkers : int
+        The total number of walkers in the simulation.
+    timestep : float
+        The timestep delta_t
+    num_steps_per_block : int
+        Number of steps of propagation before estimators are evaluated.
+    num_blocks : int
+        Number of blocks. Total number of iterations = num_blocks * num_steps_per_block.
+    num_stblz : int
+        Number of steps before QR stabilization of walkers is performed.
+    pop_control_freq : int
+        Frequency at which population control occurs.
+    rng_seed : int
+        The random number seed. If run in parallel the seeds on other cores /
+        threads are determined from this.
+    """
+
+    # Due to structure of FT algorithm, `num_steps_per_block` is fixed at 1.
+    # Overide whatever input for backward compatibility.
+    num_steps_per_block: ClassVar[int] = 1
+    mu: Optional[float] = None
+    beta: Optional[float] = None
+    pop_control_method: str = "pair_branch"
+
+    def __post_init__(self):
+        if self.mu is None:
+            raise TypeError("__init__ missing 1 required argument: 'mu'")
+        if self.beta is None:
+            raise TypeError("__init__ missing 1 required argument: 'beta'")
diff --git a/ipie/addons/thermal/qmc/tests/__init__.py b/ipie/addons/thermal/qmc/tests/__init__.py
new file mode 100644
index 00000000..f91ef518
--- /dev/null
+++ b/ipie/addons/thermal/qmc/tests/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
diff --git a/ipie/addons/thermal/qmc/tests/test_afqmc_generic.py b/ipie/addons/thermal/qmc/tests/test_afqmc_generic.py
new file mode 100644
index 00000000..206de2b5
--- /dev/null
+++ b/ipie/addons/thermal/qmc/tests/test_afqmc_generic.py
@@ -0,0 +1,203 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import json
+import tempfile
+import uuid
+from typing import Union
+
+import h5py
+import numpy
+import pytest
+
+try:
+    from ipie.addons.thermal.utils.legacy_testing import build_legacy_driver_generic_test_instance
+
+    _no_cython = False
+
+except ModuleNotFoundError:
+    _no_cython = True
+
+from ipie.addons.thermal.utils.testing import build_driver_generic_test_instance
+from ipie.analysis.extraction import (
+    extract_data,
+    extract_mixed_estimates,
+    extract_observable,
+    extract_test_data_hdf5,
+)
+from ipie.config import MPI
+
+comm = MPI.COMM_WORLD
+serial_test = comm.size == 1
+
+# Unique filename to avoid name collision when running through CI.
+if comm.rank == 0:
+    test_id = str(uuid.uuid1())
+
+else:
+    test_id = None
+
+test_id = comm.bcast(test_id, root=0)
+
+
+@pytest.mark.skipif(_no_cython, reason="Need to build cython modules.")
+@pytest.mark.unit
+def test_thermal_afqmc():
+    # System params.
+    nup = 5
+    ndown = 5
+    nelec = (nup, ndown)
+    nbasis = 10
+
+    # Thermal AFQMC params.
+    mu = -10.0
+    beta = 0.1
+    timestep = 0.01
+    nwalkers = 32 // comm.size
+    nblocks = 12
+    stabilize_freq = 10
+    pop_control_freq = 1
+    pop_control_method = "pair_branch"
+    # pop_control_method = 'comb'
+    lowrank = False
+
+    verbose = 0 if (comm.rank != 0) else 1
+    # Local energy evaluation in legacy code seems wrong.
+    complex_integrals = False
+    debug = True
+    seed = 7
+    numpy.random.seed(seed)
+
+    with tempfile.NamedTemporaryFile() as tmpf1, tempfile.NamedTemporaryFile() as tmpf2:
+        # ---------------------------------------------------------------------
+        # Test.
+        # ---------------------------------------------------------------------
+        afqmc = build_driver_generic_test_instance(
+            nelec,
+            nbasis,
+            mu,
+            beta,
+            timestep,
+            nblocks,
+            nwalkers=nwalkers,
+            lowrank=lowrank,
+            pop_control_method=pop_control_method,
+            stabilize_freq=stabilize_freq,
+            pop_control_freq=pop_control_freq,
+            complex_integrals=complex_integrals,
+            debug=debug,
+            seed=seed,
+            verbose=verbose,
+        )
+        afqmc.run(verbose=verbose, estimator_filename=tmpf1.name)
+        afqmc.finalise()
+        afqmc.estimators.compute_estimators(
+            hamiltonian=afqmc.hamiltonian, trial=afqmc.trial, walker_batch=afqmc.walkers
+        )
+
+        test_energy_data = None
+        test_energy_numer = None
+        test_energy_denom = None
+        test_number_data = None
+
+        if comm.rank == 0:
+            test_energy_data = extract_observable(afqmc.estimators.filename, "energy")
+            test_energy_numer = afqmc.estimators["energy"]["ENumer"]
+            test_energy_denom = afqmc.estimators["energy"]["EDenom"]
+            test_number_data = extract_observable(afqmc.estimators.filename, "nav")
+
+        # ---------------------------------------------------------------------
+        # Legacy.
+        # ---------------------------------------------------------------------
+        legacy_afqmc = build_legacy_driver_generic_test_instance(
+            afqmc.hamiltonian,
+            comm,
+            nelec,
+            mu,
+            beta,
+            timestep,
+            nblocks,
+            nwalkers=nwalkers,
+            lowrank=lowrank,
+            stabilize_freq=stabilize_freq,
+            pop_control_freq=pop_control_freq,
+            pop_control_method=pop_control_method,
+            seed=seed,
+            estimator_filename=tmpf2.name,
+            verbose=verbose,
+        )
+        legacy_afqmc.run(comm=comm)
+        legacy_afqmc.finalise(verbose=False)
+        legacy_afqmc.estimators.estimators["mixed"].update(
+            legacy_afqmc.qmc,
+            legacy_afqmc.system,
+            legacy_afqmc.hamiltonian,
+            legacy_afqmc.trial,
+            legacy_afqmc.walk,
+            0,
+            legacy_afqmc.propagators.free_projection,
+        )
+
+        legacy_mixed_data = None
+        enum = None
+        legacy_energy_numer = None
+        legacy_energy_denom = None
+
+        if comm.rank == 0:
+            legacy_mixed_data = extract_mixed_estimates(legacy_afqmc.estimators.filename)
+            enum = legacy_afqmc.estimators.estimators["mixed"].names
+            legacy_energy_numer = legacy_afqmc.estimators.estimators["mixed"].estimates[enum.enumer]
+            legacy_energy_denom = legacy_afqmc.estimators.estimators["mixed"].estimates[enum.edenom]
+
+            # Check.
+            assert test_energy_numer.real == pytest.approx(legacy_energy_numer.real)
+            assert test_energy_denom.real == pytest.approx(legacy_energy_denom.real)
+            assert test_energy_numer.imag == pytest.approx(legacy_energy_numer.imag)
+            assert test_energy_denom.imag == pytest.approx(legacy_energy_denom.imag)
+
+            assert numpy.mean(test_energy_data.WeightFactor.values[1:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.WeightFactor.values[1:-1].real)
+            )
+            assert numpy.mean(test_energy_data.Weight.values[1:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.Weight.values[1:-1].real)
+            )
+            assert numpy.mean(test_energy_data.ENumer.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.ENumer.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.EDenom.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.EDenom.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.ETotal.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.ETotal.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.E1Body.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.E1Body.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.E2Body.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.E2Body.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.HybridEnergy.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.EHybrid.values[:-1].real)
+            )
+            assert numpy.mean(test_number_data.Nav.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.Nav.values[:-1].real)
+            )
+
+
+if __name__ == "__main__":
+    test_thermal_afqmc()
diff --git a/ipie/addons/thermal/qmc/tests/ueg/test_afqmc_ueg.py b/ipie/addons/thermal/qmc/tests/ueg/test_afqmc_ueg.py
new file mode 100644
index 00000000..c52a8511
--- /dev/null
+++ b/ipie/addons/thermal/qmc/tests/ueg/test_afqmc_ueg.py
@@ -0,0 +1,671 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import json
+import os
+import pprint
+import sys
+import tempfile
+import uuid
+from typing import Union
+
+import h5py
+import numpy
+import pytest
+
+try:
+    from ipie.addons.thermal.utils.legacy_testing import build_legacy_driver_ueg_test_instance
+
+    _no_cython = False
+
+except ModuleNotFoundError:
+    _no_cython = True
+
+from ipie.addons.thermal.utils.testing import build_driver_ueg_test_instance
+from ipie.analysis.extraction import (
+    extract_data,
+    extract_mixed_estimates,
+    extract_observable,
+    extract_test_data_hdf5,
+    get_metadata,
+)
+from ipie.config import MPI
+
+comm = MPI.COMM_WORLD
+serial_test = comm.size == 1
+
+# Unique filename to avoid name collision when running through CI.
+if comm.rank == 0:
+    test_id = str(uuid.uuid1())
+
+else:
+    test_id = None
+
+test_id = comm.bcast(test_id, root=0)
+
+
+def compare_test_data(ref_data, test_data):
+    comparison = {}
+
+    for k, v in ref_data.items():
+        alias = [k]
+
+        if k == "sys_info":
+            continue
+
+        elif k == "EHybrid":
+            alias.append("HybridEnergy")
+
+        err = 0
+        ref = ref_data[k]
+
+        for a in alias:
+            try:
+                test = test_data[a]
+                comparison[k] = (
+                    numpy.array(ref),
+                    numpy.array(test),
+                    numpy.max(numpy.abs(numpy.array(ref) - numpy.array(test))) < 1e-10,
+                )
+
+            except KeyError:
+                err += 1
+
+        if err == len(alias):
+            print(f"# Issue with test data key {k}")
+
+    return comparison
+
+
+@pytest.mark.skipif(_no_cython, reason="Need to build cython modules.")
+@pytest.mark.unit
+def test_thermal_afqmc_1walker(against_ref=False):
+    # UEG params.
+    nup = 7
+    ndown = 7
+    nelec = (nup, ndown)
+    rs = 1.0
+    ecut = 1.0
+
+    # Thermal AFQMC params.
+    mu = -1.0
+    beta = 0.1
+    timestep = 0.01
+    nwalkers = 1
+    nblocks = 11
+
+    stabilize_freq = 10
+    pop_control_freq = 1
+    # `pop_control_method` doesn't matter for 1 walker.
+    pop_control_method = "pair_branch"
+    # pop_control_method = "comb"
+    lowrank = False
+
+    verbose = False if (comm.rank != 0) else True
+    debug = True
+    seed = 7
+    numpy.random.seed(seed)
+
+    with tempfile.NamedTemporaryFile() as tmpf1, tempfile.NamedTemporaryFile() as tmpf2:
+        # ---------------------------------------------------------------------
+        # Test.
+        # ---------------------------------------------------------------------
+        afqmc = build_driver_ueg_test_instance(
+            nelec,
+            rs,
+            ecut,
+            mu,
+            beta,
+            timestep,
+            nblocks,
+            nwalkers=nwalkers,
+            lowrank=lowrank,
+            pop_control_method=pop_control_method,
+            stabilize_freq=stabilize_freq,
+            pop_control_freq=pop_control_freq,
+            debug=debug,
+            seed=seed,
+            verbose=verbose,
+        )
+        afqmc.run(verbose=verbose, estimator_filename=tmpf1.name)
+        afqmc.finalise()
+        afqmc.estimators.compute_estimators(
+            hamiltonian=afqmc.hamiltonian, trial=afqmc.trial, walker_batch=afqmc.walkers
+        )
+
+        test_energy_data = None
+        test_energy_numer = None
+        test_energy_denom = None
+        test_number_data = None
+
+        if comm.rank == 0:
+            test_energy_data = extract_observable(afqmc.estimators.filename, "energy")
+            test_energy_numer = afqmc.estimators["energy"]["ENumer"]
+            test_energy_denom = afqmc.estimators["energy"]["EDenom"]
+            test_number_data = extract_observable(afqmc.estimators.filename, "nav")
+
+        # ---------------------------------------------------------------------
+        # Legacy.
+        # ---------------------------------------------------------------------
+        legacy_afqmc = build_legacy_driver_ueg_test_instance(
+            comm,
+            nelec,
+            rs,
+            ecut,
+            mu,
+            beta,
+            timestep,
+            nblocks,
+            nwalkers=nwalkers,
+            lowrank=lowrank,
+            stabilize_freq=stabilize_freq,
+            pop_control_freq=pop_control_freq,
+            pop_control_method=pop_control_method,
+            seed=seed,
+            estimator_filename=tmpf2.name,
+            verbose=verbose,
+        )
+        legacy_afqmc.run(comm=comm)
+        legacy_afqmc.finalise(verbose=False)
+        legacy_afqmc.estimators.estimators["mixed"].update(
+            legacy_afqmc.qmc,
+            legacy_afqmc.system,
+            legacy_afqmc.hamiltonian,
+            legacy_afqmc.trial,
+            legacy_afqmc.walk,
+            0,
+            legacy_afqmc.propagators.free_projection,
+        )
+
+        legacy_mixed_data = None
+        enum = None
+        legacy_energy_numer = None
+        legacy_energy_denom = None
+
+        if comm.rank == 0:
+            legacy_mixed_data = extract_mixed_estimates(legacy_afqmc.estimators.filename)
+            enum = legacy_afqmc.estimators.estimators["mixed"].names
+            legacy_energy_numer = legacy_afqmc.estimators.estimators["mixed"].estimates[enum.enumer]
+            legacy_energy_denom = legacy_afqmc.estimators.estimators["mixed"].estimates[enum.edenom]
+
+            # Check.
+            assert test_energy_numer.real == pytest.approx(legacy_energy_numer.real)
+            assert test_energy_denom.real == pytest.approx(legacy_energy_denom.real)
+            assert test_energy_numer.imag == pytest.approx(legacy_energy_numer.imag)
+            assert test_energy_denom.imag == pytest.approx(legacy_energy_denom.imag)
+
+            assert numpy.mean(test_energy_data.WeightFactor.values[1:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.WeightFactor.values[1:-1].real)
+            )
+            assert numpy.mean(test_energy_data.Weight.values[1:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.Weight.values[1:-1].real)
+            )
+            assert numpy.mean(test_energy_data.ENumer.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.ENumer.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.EDenom.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.EDenom.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.ETotal.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.ETotal.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.E1Body.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.E1Body.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.E2Body.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.E2Body.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.HybridEnergy.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.EHybrid.values[:-1].real)
+            )
+            assert numpy.mean(test_number_data.Nav.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.Nav.values[:-1].real)
+            )
+
+            # ---------------------------------------------------------------------
+            # Test against reference data.
+            if against_ref:
+                _data_dir = (
+                    os.path.abspath(os.path.dirname(__file__)).split("qmc")[0] + "/reference_data/"
+                )
+                _legacy_test_dir = "ueg"
+                _legacy_test = _data_dir + _legacy_test_dir + "/reference_1walker.json"
+
+                test_name = _legacy_test_dir
+                with open(_legacy_test, "r") as f:
+                    ref_data = json.load(f)
+
+                skip_val = ref_data.get("extract_skip_value", 10)
+                _test_energy_data = test_energy_data[::skip_val].to_dict(orient="list")
+                _test_number_data = test_number_data[::skip_val].to_dict(orient="list")
+                energy_comparison = compare_test_data(ref_data, _test_energy_data)
+                number_comparison = compare_test_data(ref_data, _test_number_data)
+
+                print("\nenergy comparison:")
+                pprint.pprint(energy_comparison)
+                print("\nnumber comparison:")
+                pprint.pprint(number_comparison)
+
+                local_err_count = 0
+
+                for k, v in energy_comparison.items():
+                    if not v[-1]:
+                        local_err_count += 1
+                        print(
+                            f"\n *** FAILED *** : mismatch between benchmark and test run: {test_name}"
+                        )
+                        print(f" name = {k}\n ref = {v[0]}\n test = {v[1]}\n delta = {v[0]-v[1]}\n")
+
+                for k, v in number_comparison.items():
+                    if not v[-1]:
+                        local_err_count += 1
+                        print(
+                            f"\n *** FAILED *** : mismatch between benchmark and test run: {test_name}"
+                        )
+                        print(f" name = {k}\n ref = {v[0]}\n test = {v[1]}\n delta = {v[0]-v[1]}\n")
+
+                if local_err_count == 0:
+                    print(f"\n*** PASSED : {test_name} ***\n")
+
+
+@pytest.mark.skipif(_no_cython, reason="Need to build cython modules.")
+@pytest.mark.unit
+def test_thermal_afqmc(against_ref=False):
+    # UEG params.
+    nup = 7
+    ndown = 7
+    nelec = (nup, ndown)
+    rs = 1.0
+    ecut = 1.0
+
+    # Thermal AFQMC params.
+    mu = -1.0
+    beta = 0.1
+    timestep = 0.01
+    nwalkers = 32
+    # Must be fixed at 1 for Thermal AFQMC--legacy code overides whatever input!
+    nsteps_per_block = 1
+    nblocks = 10
+    stabilize_freq = 10
+    pop_control_freq = 1
+    pop_control_method = "pair_branch"
+    # pop_control_method = "comb"
+    lowrank = False
+
+    verbose = False if (comm.rank != 0) else True
+    debug = True
+    seed = 7
+    numpy.random.seed(seed)
+
+    with tempfile.NamedTemporaryFile() as tmpf1, tempfile.NamedTemporaryFile() as tmpf2:
+        # ---------------------------------------------------------------------
+        # Test.
+        # ---------------------------------------------------------------------
+        afqmc = build_driver_ueg_test_instance(
+            nelec,
+            rs,
+            ecut,
+            mu,
+            beta,
+            timestep,
+            nblocks,
+            nwalkers=nwalkers,
+            lowrank=lowrank,
+            pop_control_method=pop_control_method,
+            stabilize_freq=stabilize_freq,
+            pop_control_freq=pop_control_freq,
+            debug=debug,
+            seed=seed,
+            verbose=verbose,
+        )
+        afqmc.run(verbose=verbose, estimator_filename=tmpf1.name)
+        afqmc.finalise()
+        afqmc.estimators.compute_estimators(
+            hamiltonian=afqmc.hamiltonian, trial=afqmc.trial, walker_batch=afqmc.walkers
+        )
+
+        test_energy_data = None
+        test_energy_numer = None
+        test_energy_denom = None
+        test_number_data = None
+
+        if comm.rank == 0:
+            test_energy_data = extract_observable(afqmc.estimators.filename, "energy")
+            test_energy_numer = afqmc.estimators["energy"]["ENumer"]
+            test_energy_denom = afqmc.estimators["energy"]["EDenom"]
+            test_number_data = extract_observable(afqmc.estimators.filename, "nav")
+
+        # ---------------------------------------------------------------------
+        # Legacy.
+        # ---------------------------------------------------------------------
+        legacy_afqmc = build_legacy_driver_ueg_test_instance(
+            comm,
+            nelec,
+            rs,
+            ecut,
+            mu,
+            beta,
+            timestep,
+            nblocks,
+            nwalkers=nwalkers,
+            lowrank=lowrank,
+            stabilize_freq=stabilize_freq,
+            pop_control_freq=pop_control_freq,
+            pop_control_method=pop_control_method,
+            seed=seed,
+            estimator_filename=tmpf2.name,
+            verbose=verbose,
+        )
+        legacy_afqmc.run(comm=comm)
+        legacy_afqmc.finalise(verbose=False)
+        legacy_afqmc.estimators.estimators["mixed"].update(
+            legacy_afqmc.qmc,
+            legacy_afqmc.system,
+            legacy_afqmc.hamiltonian,
+            legacy_afqmc.trial,
+            legacy_afqmc.walk,
+            0,
+            legacy_afqmc.propagators.free_projection,
+        )
+
+        legacy_mixed_data = None
+        enum = None
+        legacy_energy_numer = None
+        legacy_energy_denom = None
+
+        if comm.rank == 0:
+            legacy_mixed_data = extract_mixed_estimates(legacy_afqmc.estimators.filename)
+            enum = legacy_afqmc.estimators.estimators["mixed"].names
+            legacy_energy_numer = legacy_afqmc.estimators.estimators["mixed"].estimates[enum.enumer]
+            legacy_energy_denom = legacy_afqmc.estimators.estimators["mixed"].estimates[enum.edenom]
+
+            # Check.
+            assert test_energy_numer.real == pytest.approx(legacy_energy_numer.real)
+            assert test_energy_denom.real == pytest.approx(legacy_energy_denom.real)
+            assert test_energy_numer.imag == pytest.approx(legacy_energy_numer.imag)
+            assert test_energy_denom.imag == pytest.approx(legacy_energy_denom.imag)
+
+            assert numpy.mean(test_energy_data.WeightFactor.values[1:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.WeightFactor.values[1:-1].real)
+            )
+            assert numpy.mean(test_energy_data.Weight.values[1:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.Weight.values[1:-1].real)
+            )
+            assert numpy.mean(test_energy_data.ENumer.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.ENumer.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.EDenom.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.EDenom.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.ETotal.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.ETotal.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.E1Body.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.E1Body.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.E2Body.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.E2Body.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.HybridEnergy.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.EHybrid.values[:-1].real)
+            )
+            assert numpy.mean(test_number_data.Nav.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.Nav.values[:-1].real)
+            )
+
+            # ---------------------------------------------------------------------
+            # Test against reference data.
+            if against_ref:
+                _data_dir = (
+                    os.path.abspath(os.path.dirname(__file__)).split("qmc")[0] + "/reference_data/"
+                )
+                _legacy_test_dir = "ueg"
+                _legacy_test = _data_dir + _legacy_test_dir + "/reference_nompi.json"
+
+                test_name = _legacy_test_dir
+                with open(_legacy_test, "r") as f:
+                    ref_data = json.load(f)
+
+                skip_val = ref_data.get("extract_skip_value", 10)
+                _test_energy_data = test_energy_data[::skip_val].to_dict(orient="list")
+                _test_number_data = test_number_data[::skip_val].to_dict(orient="list")
+                energy_comparison = compare_test_data(ref_data, _test_energy_data)
+                number_comparison = compare_test_data(ref_data, _test_number_data)
+
+                print("\nenergy comparison:")
+                pprint.pprint(energy_comparison)
+                print("\nnumber comparison:")
+                pprint.pprint(number_comparison)
+
+                local_err_count = 0
+
+                for k, v in energy_comparison.items():
+                    if not v[-1]:
+                        local_err_count += 1
+                        print(
+                            f"\n *** FAILED *** : mismatch between benchmark and test run: {test_name}"
+                        )
+                        print(f" name = {k}\n ref = {v[0]}\n test = {v[1]}\n delta = {v[0]-v[1]}\n")
+
+                for k, v in number_comparison.items():
+                    if not v[-1]:
+                        local_err_count += 1
+                        print(
+                            f"\n *** FAILED *** : mismatch between benchmark and test run: {test_name}"
+                        )
+                        print(f" name = {k}\n ref = {v[0]}\n test = {v[1]}\n delta = {v[0]-v[1]}\n")
+
+                if local_err_count == 0:
+                    print(f"\n*** PASSED : {test_name} ***\n")
+
+
+@pytest.mark.skipif(_no_cython, reason="Need to build cython modules.")
+@pytest.mark.mpi
+def test_thermal_afqmc_mpi(against_ref=False):
+    # UEG params.
+    nup = 7
+    ndown = 7
+    nelec = (nup, ndown)
+    rs = 1.0
+    ecut = 1.0
+
+    # Thermal AFQMC params.
+    mu = -1.0
+    beta = 0.1
+    timestep = 0.01
+    nwalkers = 32 // comm.size
+    # Must be fixed at 1 for Thermal AFQMC--legacy code overides whatever input!
+    nsteps_per_block = 1
+    nblocks = 10
+    stabilize_freq = 10
+    pop_control_freq = 1
+    pop_control_method = "pair_branch"
+    # pop_control_method = "comb"
+    lowrank = False
+
+    verbose = False if (comm.rank != 0) else True
+    debug = True
+    seed = 7
+    numpy.random.seed(seed)
+
+    with tempfile.NamedTemporaryFile() as tmpf1, tempfile.NamedTemporaryFile() as tmpf2:
+        # ---------------------------------------------------------------------
+        # Test.
+        # ---------------------------------------------------------------------
+        afqmc = build_driver_ueg_test_instance(
+            nelec,
+            rs,
+            ecut,
+            mu,
+            beta,
+            timestep,
+            nblocks,
+            nwalkers=nwalkers,
+            lowrank=lowrank,
+            pop_control_method=pop_control_method,
+            stabilize_freq=stabilize_freq,
+            pop_control_freq=pop_control_freq,
+            debug=debug,
+            seed=seed,
+            verbose=verbose,
+        )
+        afqmc.run(verbose=verbose, estimator_filename=tmpf1.name)
+        afqmc.finalise()
+        afqmc.estimators.compute_estimators(
+            hamiltonian=afqmc.hamiltonian, trial=afqmc.trial, walker_batch=afqmc.walkers
+        )
+
+        test_energy_data = None
+        test_energy_numer = None
+        test_energy_denom = None
+        test_number_data = None
+
+        if comm.rank == 0:
+            test_energy_data = extract_observable(afqmc.estimators.filename, "energy")
+            test_energy_numer = afqmc.estimators["energy"]["ENumer"]
+            test_energy_denom = afqmc.estimators["energy"]["EDenom"]
+            test_number_data = extract_observable(afqmc.estimators.filename, "nav")
+
+        # ---------------------------------------------------------------------
+        # Legacy.
+        # ---------------------------------------------------------------------
+        legacy_afqmc = build_legacy_driver_ueg_test_instance(
+            comm,
+            nelec,
+            rs,
+            ecut,
+            mu,
+            beta,
+            timestep,
+            nblocks,
+            nwalkers=nwalkers,
+            lowrank=lowrank,
+            stabilize_freq=stabilize_freq,
+            pop_control_freq=pop_control_freq,
+            pop_control_method=pop_control_method,
+            seed=seed,
+            estimator_filename=tmpf2.name,
+            verbose=verbose,
+        )
+        legacy_afqmc.run(comm=comm)
+        legacy_afqmc.finalise(verbose=False)
+        legacy_afqmc.estimators.estimators["mixed"].update(
+            legacy_afqmc.qmc,
+            legacy_afqmc.system,
+            legacy_afqmc.hamiltonian,
+            legacy_afqmc.trial,
+            legacy_afqmc.walk,
+            0,
+            legacy_afqmc.propagators.free_projection,
+        )
+
+        legacy_mixed_data = None
+        enum = None
+        legacy_energy_numer = None
+        legacy_energy_denom = None
+
+        if comm.rank == 0:
+            legacy_mixed_data = extract_mixed_estimates(legacy_afqmc.estimators.filename)
+            enum = legacy_afqmc.estimators.estimators["mixed"].names
+            legacy_energy_numer = legacy_afqmc.estimators.estimators["mixed"].estimates[enum.enumer]
+            legacy_energy_denom = legacy_afqmc.estimators.estimators["mixed"].estimates[enum.edenom]
+
+            # Check.
+            assert test_energy_numer.real == pytest.approx(legacy_energy_numer.real)
+            assert test_energy_denom.real == pytest.approx(legacy_energy_denom.real)
+            assert test_energy_numer.imag == pytest.approx(legacy_energy_numer.imag)
+            assert test_energy_denom.imag == pytest.approx(legacy_energy_denom.imag)
+
+            assert numpy.mean(test_energy_data.WeightFactor.values[1:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.WeightFactor.values[1:-1].real)
+            )
+            assert numpy.mean(test_energy_data.Weight.values[1:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.Weight.values[1:-1].real)
+            )
+            assert numpy.mean(test_energy_data.ENumer.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.ENumer.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.EDenom.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.EDenom.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.ETotal.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.ETotal.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.E1Body.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.E1Body.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.E2Body.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.E2Body.values[:-1].real)
+            )
+            assert numpy.mean(test_energy_data.HybridEnergy.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.EHybrid.values[:-1].real)
+            )
+            assert numpy.mean(test_number_data.Nav.values[:-1].real) == pytest.approx(
+                numpy.mean(legacy_mixed_data.Nav.values[:-1].real)
+            )
+
+            # ---------------------------------------------------------------------
+            # Test against reference data.
+            if against_ref:
+                _data_dir = (
+                    os.path.abspath(os.path.dirname(__file__)).split("qmc")[0] + "/reference_data/"
+                )
+                _legacy_test_dir = "ueg"
+                _legacy_test = _data_dir + _legacy_test_dir + "/reference.json"
+
+                test_name = _legacy_test_dir
+                with open(_legacy_test, "r") as f:
+                    ref_data = json.load(f)
+
+                skip_val = ref_data.get("extract_skip_value", 10)
+                _test_energy_data = test_energy_data[::skip_val].to_dict(orient="list")
+                _test_number_data = test_number_data[::skip_val].to_dict(orient="list")
+                energy_comparison = compare_test_data(ref_data, _test_energy_data)
+                number_comparison = compare_test_data(ref_data, _test_number_data)
+
+                print("\nenergy comparison:")
+                pprint.pprint(energy_comparison)
+                print("\nnumber comparison:")
+                pprint.pprint(number_comparison)
+
+                local_err_count = 0
+
+                for k, v in energy_comparison.items():
+                    if not v[-1]:
+                        local_err_count += 1
+                        print(
+                            f"\n *** FAILED *** : mismatch between benchmark and test run: {test_name}"
+                        )
+                        print(f" name = {k}\n ref = {v[0]}\n test = {v[1]}\n delta = {v[0]-v[1]}\n")
+
+                for k, v in number_comparison.items():
+                    if not v[-1]:
+                        local_err_count += 1
+                        print(
+                            f"\n *** FAILED *** : mismatch between benchmark and test run: {test_name}"
+                        )
+                        print(f" name = {k}\n ref = {v[0]}\n test = {v[1]}\n delta = {v[0]-v[1]}\n")
+
+                if local_err_count == 0:
+                    print(f"\n*** PASSED : {test_name} ***\n")
+
+
+if __name__ == "__main__":
+    test_thermal_afqmc_1walker(against_ref=True)
+    test_thermal_afqmc(against_ref=True)
+    # test_thermal_afqmc_mpi(against_ref=True)
diff --git a/ipie/addons/thermal/qmc/thermal_afqmc.py b/ipie/addons/thermal/qmc/thermal_afqmc.py
new file mode 100644
index 00000000..dc682650
--- /dev/null
+++ b/ipie/addons/thermal/qmc/thermal_afqmc.py
@@ -0,0 +1,354 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+"""Driver to perform Thermal AFQMC calculation"""
+import json
+import time
+from typing import Dict, Optional, Tuple
+
+import numpy
+
+from ipie.addons.thermal.estimators.handler import ThermalEstimatorHandler
+from ipie.addons.thermal.propagation.propagator import Propagator
+from ipie.addons.thermal.qmc.options import ThermalQMCParams
+from ipie.addons.thermal.walkers.pop_controller import ThermalPopController
+from ipie.addons.thermal.walkers.uhf_walkers import UHFThermalWalkers
+from ipie.estimators.estimator_base import EstimatorBase
+from ipie.qmc.afqmc import AFQMCBase
+from ipie.utils.backend import arraylib as xp
+from ipie.utils.backend import synchronize
+from ipie.utils.io import to_json
+from ipie.utils.mpi import MPIHandler
+from ipie.walkers.base_walkers import WalkerAccumulator
+
+
+class ThermalAFQMC(AFQMCBase):
+    """Thermal AFQMC driver.
+
+    Parameters
+    ----------
+    hamiltonian :
+        Hamiltonian describing the system.
+    trial :
+        Trial density matrix.
+    walkers :
+        Walkers used for open ended random walk.
+    propagator :
+        Class describing how to propagate walkers.
+    params :
+        Parameters of simulation. See ThermalQMCParams for description.
+    verbose : bool
+        How much information to print.
+
+    Attributes
+    ----------
+    _parallel_rng_seed : int
+        Seed deduced from params.rng_seed which is generally different on each
+            MPI process.
+    """
+
+    def __init__(
+        self,
+        hamiltonian,
+        trial,
+        walkers,
+        propagator,
+        mpi_handler,
+        params: ThermalQMCParams,
+        debug: bool = False,
+        verbose: int = 0,
+    ):
+        super().__init__(
+            None, hamiltonian, trial, walkers, propagator, mpi_handler, params, verbose
+        )
+        self.debug = debug
+
+        if self.debug and verbose:
+            print("# Using legacy `update_weights`.")
+
+    @staticmethod
+    def build(
+        nelec: Tuple[int, int],
+        mu: float,
+        beta: float,
+        hamiltonian,
+        trial,
+        nwalkers: int = 100,
+        stack_size: int = 10,
+        seed: Optional[int] = None,
+        nblocks: int = 100,
+        timestep: float = 0.005,
+        stabilize_freq: int = 5,
+        pop_control_freq: int = 5,
+        pop_control_method: str = "pair_branch",
+        lowrank: bool = False,
+        lowrank_thresh: float = 1e-6,
+        debug: bool = False,
+        verbose: int = 0,
+        mpi_handler=None,
+    ) -> "ThermalAFQMC":
+        """Factory method to build thermal AFQMC driver from hamiltonian and trial density matrix.
+
+        Parameters
+        ----------
+        num_elec : tuple(int, int)
+            Number of alpha and beta electrons.
+        mu : float
+            Chemical potential.
+        beta : float
+            Inverse temperature.
+        hamiltonian :
+            Hamiltonian describing the system.
+        trial :
+            Trial density matrix.
+        nwalkers : int
+            Number of walkers per MPI process used in the simulation. The TOTAL
+                number of walkers is nwalkers * number of processes.
+        nblocks : int
+            Number of blocks to perform.
+        timestep : float
+            Imaginary timestep. Default 0.005.
+        stabilize_freq : float
+            Frequency at which to perform QR factorization of walkers (in units
+                of steps.) Default 25.
+        pop_control_freq : int
+            Frequency at which to perform population control (in units of
+                steps.) Default 25.
+        lowrank : bool
+            Low-rank algorithm for thermal propagation. Doesn't work for now!
+        lowrank_thresh : bool
+            Threshold for low-rank algorithm.
+        verbose : bool
+            Log verbosity. Default True i.e. print information to stdout.
+        """
+        if mpi_handler is None:
+            mpi_handler = MPIHandler()
+            comm = mpi_handler.comm
+
+        else:
+            comm = mpi_handler.comm
+
+        # pylint: disable = no-value-for-parameter
+        params = ThermalQMCParams(
+            mu=mu,
+            beta=beta,
+            num_walkers=nwalkers,
+            total_num_walkers=nwalkers * comm.size,
+            num_blocks=nblocks,
+            timestep=timestep,
+            num_stblz=stabilize_freq,
+            pop_control_freq=pop_control_freq,
+            pop_control_method=pop_control_method,
+            rng_seed=seed,
+        )
+
+        walkers = UHFThermalWalkers(
+            trial,
+            hamiltonian.nbasis,
+            nwalkers,
+            stack_size=stack_size,
+            lowrank=lowrank,
+            lowrank_thresh=lowrank_thresh,
+            mpi_handler=mpi_handler,
+            verbose=verbose,
+        )
+        propagator = Propagator[type(hamiltonian)](timestep, mu, lowrank=lowrank, verbose=verbose)
+        propagator.build(
+            hamiltonian, trial=trial, walkers=walkers, mpi_handler=mpi_handler, verbose=verbose
+        )
+        return ThermalAFQMC(
+            hamiltonian,
+            trial,
+            walkers,
+            propagator,
+            mpi_handler,
+            params,
+            debug=debug,
+            verbose=verbose,
+        )
+
+    def run(
+        self,
+        walkers=None,
+        estimator_filename=None,
+        verbose: bool = True,
+        additional_estimators: Optional[Dict[str, EstimatorBase]] = None,
+        print_time_slice: bool = False,
+    ):
+        """Perform Thermal AFQMC simulation on state object using open-ended random walk.
+
+        Parameters
+        ----------
+        state : :class:`pie.state.State` object
+            Model and qmc parameters.
+
+        walkers: :class:`pie.walker.Walkers` object
+            Initial wavefunction / distribution of walkers.
+
+        estimator_filename : str
+            File to write estimates to.
+
+        additional_estimators : dict
+            Dictionary of additional estimators to evaluate.
+        """
+        # Setup.
+        self.setup_timers()
+        ft_setup = time.time()
+        eshift = 0.0
+
+        if walkers is not None:
+            self.walkers = walkers
+
+        self.pcontrol = ThermalPopController(
+            self.params.num_walkers,
+            self.params.num_steps_per_block,
+            self.mpi_handler,
+            self.params.pop_control_method,
+            verbose=self.verbose,
+        )
+
+        self.get_env_info()
+        self.setup_estimators(estimator_filename, additional_estimators=additional_estimators)
+
+        synchronize()
+        comm = self.mpi_handler.comm
+        self.tsetup += time.time() - ft_setup
+
+        # Propagate.
+        total_steps = self.params.num_steps_per_block * self.params.num_blocks
+        # TODO: This magic value of 2 is pretty much never controlled on input.
+        # Moreover I'm not convinced having a two stage shift update actually
+        # matters at all.
+        neqlb_steps = 2.0 / self.params.timestep
+        nslices = numpy.rint(self.params.beta / self.params.timestep).astype(int)
+
+        for step in range(1, total_steps + 1):
+            synchronize()
+            start_step = time.time()
+
+            for t in range(nslices):
+                if self.verbosity >= 2 and comm.rank == 0:
+                    print(" # Timeslice %d of %d." % (t, nslices))
+
+                start = time.time()
+                self.propagator.propagate_walkers(
+                    self.walkers, self.hamiltonian, self.trial, eshift, debug=self.debug
+                )
+
+                self.tprop_fbias = self.propagator.timer.tfbias
+                self.tprop_update = self.propagator.timer.tupdate
+                self.tprop_vhs = self.propagator.timer.tvhs
+                self.tprop_gemm = self.propagator.timer.tgemm
+
+                start_clip = time.time()
+                if t > 0:
+                    wbound = self.pcontrol.total_weight * 0.10
+                    xp.clip(
+                        self.walkers.weight, a_min=-wbound, a_max=wbound, out=self.walkers.weight
+                    )  # In-place clipping.
+
+                synchronize()
+                self.tprop_clip += time.time() - start_clip
+
+                start_barrier = time.time()
+                if t % self.params.pop_control_freq == 0:
+                    comm.Barrier()
+
+                self.tprop_barrier += time.time() - start_barrier
+                self.tprop += time.time() - start
+
+                if (t > 0) and (t % self.params.pop_control_freq == 0):
+                    start = time.time()
+                    self.pcontrol.pop_control(self.walkers, comm)
+                    synchronize()
+                    self.tpopc += time.time() - start
+                    self.tpopc_send = self.pcontrol.timer.send_time
+                    self.tpopc_recv = self.pcontrol.timer.recv_time
+                    self.tpopc_comm = self.pcontrol.timer.communication_time
+                    self.tpopc_non_comm = self.pcontrol.timer.non_communication_time
+
+                # Print estimators at each time slice.
+                if print_time_slice:
+                    self.estimators.compute_estimators(
+                        hamiltonian=self.hamiltonian, trial=self.trial, walker_batch=self.walkers
+                    )
+                    self.estimators.print_time_slice(comm, t, self.accumulators)
+
+            # Accumulate weight, hybrid energy etc. across block.
+            start = time.time()
+            self.accumulators.update(self.walkers)
+            self.testim += time.time() - start
+
+            # Calculate estimators.
+            start = time.time()
+            if step % self.params.num_steps_per_block == 0:
+                self.estimators.compute_estimators(
+                    hamiltonian=self.hamiltonian, trial=self.trial, walker_batch=self.walkers
+                )
+
+                self.estimators.print_block(
+                    comm, step // self.params.num_steps_per_block, self.accumulators
+                )
+                self.accumulators.zero()
+
+            synchronize()
+            self.testim += time.time() - start
+
+            if step < neqlb_steps:
+                eshift = self.accumulators.eshift
+
+            else:
+                eshift += self.accumulators.eshift - eshift
+
+            self.walkers.reset(self.trial)  # Reset stack, weights, phase.
+
+            synchronize()
+            self.tstep += time.time() - start_step
+
+    def setup_estimators(
+        self, filename, additional_estimators: Optional[Dict[str, EstimatorBase]] = None
+    ):
+        self.accumulators = WalkerAccumulator(
+            ["Weight", "WeightFactor", "HybridEnergy"], self.params.num_steps_per_block
+        )
+        comm = self.mpi_handler.comm
+        self.estimators = ThermalEstimatorHandler(
+            self.mpi_handler.comm,
+            self.hamiltonian,
+            self.trial,
+            walker_state=self.accumulators,
+            verbose=(comm.rank == 0 and self.verbose),
+            filename=filename,
+        )
+
+        if additional_estimators is not None:
+            for k, v in additional_estimators.items():
+                self.estimators[k] = v
+
+        # TODO: Move this to estimator and log uuid etc in serialization
+        json.encoder.FLOAT_REPR = lambda o: format(o, ".6f")
+        json_string = to_json(self)
+        self.estimators.json_string = json_string
+        self.estimators.initialize(comm)
+
+        # Calculate estimates for initial distribution of walkers.
+        self.estimators.compute_estimators(
+            hamiltonian=self.hamiltonian, trial=self.trial, walker_batch=self.walkers
+        )
+        self.accumulators.update(self.walkers)
+        self.estimators.print_block(comm, 0, self.accumulators)
+        self.accumulators.zero()
diff --git a/ipie/addons/thermal/reference_data/generic/generic_integrals.h5 b/ipie/addons/thermal/reference_data/generic/generic_integrals.h5
new file mode 100644
index 00000000..cd0fa383
Binary files /dev/null and b/ipie/addons/thermal/reference_data/generic/generic_integrals.h5 differ
diff --git a/ipie/addons/thermal/reference_data/generic/generic_ref.json b/ipie/addons/thermal/reference_data/generic/generic_ref.json
new file mode 100644
index 00000000..d042dba9
--- /dev/null
+++ b/ipie/addons/thermal/reference_data/generic/generic_ref.json
@@ -0,0 +1 @@
+{"WeightFactor": [1.0, 1.0], "Weight": [1.0, 0.00031475080684714826], "ENumer": [0.12706480956961563, 0.00057748930951224], "EDenom": [1.0, 0.00031475080684714826], "ETotal": [0.12706480956961563, 1.834750847176334], "E1Body": [-12.035567865348977, -1.6611431481738972], "E2Body": [12.162632674918592, 3.495893995350231], "EHybrid": [0.0, 0.0], "Overlap": [1.0, 1.0], "Nav": [9.864020634242591, 5.424380018458521]}
\ No newline at end of file
diff --git a/ipie/addons/thermal/reference_data/ueg/input.json b/ipie/addons/thermal/reference_data/ueg/input.json
new file mode 100644
index 00000000..defb4831
--- /dev/null
+++ b/ipie/addons/thermal/reference_data/ueg/input.json
@@ -0,0 +1,32 @@
+{
+    "system": {
+        "name": "UEG",
+        "nup": 7,
+        "ndown": 7,
+        "rs": 1.0,
+        "mu": -1.0,
+        "ecut": 1.0
+    },
+    "qmc": {
+        "dt": 0.01,
+        "nwalkers": 32,
+        "blocks": 10,
+        "nsteps": 1,
+        "beta": 0.1,
+        "rng_seed": 7,
+        "pop_control_freq": 1,
+        "stabilise_freq": 10,
+        "batched": false
+    },
+    "trial": {
+        "name": "one_body"
+    },
+    "walkers": {
+        "population_control": "pair_branch"
+    },
+    "estimators": {
+        "mixed": {
+            "one_rdm": true
+        }
+    }
+}
diff --git a/ipie/addons/thermal/reference_data/ueg/input_1walker.json b/ipie/addons/thermal/reference_data/ueg/input_1walker.json
new file mode 100644
index 00000000..425e79ed
--- /dev/null
+++ b/ipie/addons/thermal/reference_data/ueg/input_1walker.json
@@ -0,0 +1,32 @@
+{
+    "system": {
+        "name": "UEG",
+        "nup": 7,
+        "ndown": 7,
+        "rs": 1.0,
+        "mu": -1.0,
+        "ecut": 1.0
+    },
+    "qmc": {
+        "dt": 0.01,
+        "nwalkers": 1,
+        "blocks": 10,
+        "nsteps": 1,
+        "beta": 0.1,
+        "rng_seed": 7,
+        "pop_control_freq": 1,
+        "stabilise_freq": 10,
+        "batched": false
+    },
+    "trial": {
+        "name": "one_body"
+    },
+    "walkers": {
+        "population_control": "pair_branch"
+    },
+    "estimators": {
+        "mixed": {
+            "one_rdm": true
+        }
+    }
+}
diff --git a/ipie/addons/thermal/reference_data/ueg/input_nompi.json b/ipie/addons/thermal/reference_data/ueg/input_nompi.json
new file mode 100644
index 00000000..defb4831
--- /dev/null
+++ b/ipie/addons/thermal/reference_data/ueg/input_nompi.json
@@ -0,0 +1,32 @@
+{
+    "system": {
+        "name": "UEG",
+        "nup": 7,
+        "ndown": 7,
+        "rs": 1.0,
+        "mu": -1.0,
+        "ecut": 1.0
+    },
+    "qmc": {
+        "dt": 0.01,
+        "nwalkers": 32,
+        "blocks": 10,
+        "nsteps": 1,
+        "beta": 0.1,
+        "rng_seed": 7,
+        "pop_control_freq": 1,
+        "stabilise_freq": 10,
+        "batched": false
+    },
+    "trial": {
+        "name": "one_body"
+    },
+    "walkers": {
+        "population_control": "pair_branch"
+    },
+    "estimators": {
+        "mixed": {
+            "one_rdm": true
+        }
+    }
+}
diff --git a/ipie/addons/thermal/reference_data/ueg/reference.json b/ipie/addons/thermal/reference_data/ueg/reference.json
new file mode 100644
index 00000000..d1cfd38b
--- /dev/null
+++ b/ipie/addons/thermal/reference_data/ueg/reference.json
@@ -0,0 +1 @@
+{"WeightFactor": [32.0, 47.947639102045635], "Weight": [32.0, 31.999999999999993], "ENumer": [853.4128425513718, 986.7978362646822], "EDenom": [32.0, 31.999999999999993], "ETotal": [26.66915132973037, 30.837432383271327], "E1Body": [28.374994808285745, 33.217171356971804], "E2Body": [-1.705843478555375, -2.379738973700476], "EHybrid": [0.0, 0.0], "Overlap": [1.0, 1.0], "Nav": [14.000000381209672, 16.37587194751124]}
\ No newline at end of file
diff --git a/ipie/addons/thermal/reference_data/ueg/reference_1walker.json b/ipie/addons/thermal/reference_data/ueg/reference_1walker.json
new file mode 100644
index 00000000..46a6ca8f
--- /dev/null
+++ b/ipie/addons/thermal/reference_data/ueg/reference_1walker.json
@@ -0,0 +1 @@
+{"WeightFactor": [1.0, 1.0], "Weight": [1.0, 0.1], "ENumer": [26.669151329730372, 3.0880611634447632], "EDenom": [1.0, 0.1], "ETotal": [26.669151329730372, 30.880611634447632], "E1Body": [28.374994808285745, 33.2449965368494], "E2Body": [-1.7058434785553742, -2.364384902401771], "EHybrid": [0.0, 0.0], "Overlap": [1.0, 1.0], "Nav": [14.000000381209672, 16.388846234869657], "sys_info": {"nranks": 1, "branch": "ft_cleanup", "sha1": "3e25fb5948014be236679907b176cb8087863e3e-dirty", "numpy": {"version": "1.24.4", "path": "/Users/jiang/opt/anaconda3/envs/ipie_ftclean/lib/python3.8/site-packages/numpy", "BLAS": {"lib": "openblas64_ openblas64_", "path": "/usr/local/lib"}}, "scipy": {"version": "1.10.1", "path": "/Users/jiang/opt/anaconda3/envs/ipie_ftclean/lib/python3.8/site-packages/scipy"}, "h5py": {"version": "3.9.0", "path": "/Users/jiang/opt/anaconda3/envs/ipie_ftclean/lib/python3.8/site-packages/h5py"}}}
\ No newline at end of file
diff --git a/ipie/addons/thermal/reference_data/ueg/reference_nompi.json b/ipie/addons/thermal/reference_data/ueg/reference_nompi.json
new file mode 100644
index 00000000..d7a15e47
--- /dev/null
+++ b/ipie/addons/thermal/reference_data/ueg/reference_nompi.json
@@ -0,0 +1 @@
+{"WeightFactor": [32.0, 47.78867518157943], "Weight": [32.0, 32.00000000000001], "ENumer": [853.4128425513711, 986.9845115919738], "EDenom": [32.0, 32.00000000000001], "ETotal": [26.669151329730347, 30.843265987249175], "E1Body": [28.374994808285724, 33.2211214029656], "E2Body": [-1.7058434785553744, -2.3778554157164185], "EHybrid": [0.0, 0.0], "Overlap": [1.0, 1.0], "Nav": [14.000000381209661, 16.3778298504611]}
\ No newline at end of file
diff --git a/ipie/addons/thermal/trial/__init__.py b/ipie/addons/thermal/trial/__init__.py
new file mode 100644
index 00000000..f91ef518
--- /dev/null
+++ b/ipie/addons/thermal/trial/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
diff --git a/ipie/addons/thermal/trial/chem_pot.py b/ipie/addons/thermal/trial/chem_pot.py
new file mode 100644
index 00000000..d54c87c5
--- /dev/null
+++ b/ipie/addons/thermal/trial/chem_pot.py
@@ -0,0 +1,86 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+
+from ipie.addons.thermal.estimators.particle_number import particle_number
+from ipie.addons.thermal.estimators.thermal import one_rdm_stable
+from ipie.utils.io import format_fixed_width_floats, format_fixed_width_strings
+
+
+def find_chemical_potential(
+    alt_convention, rho, beta, num_bins, target, deps=1e-6, max_it=1000, verbose=False
+):
+    """Find the chemical potential to match <N>."""
+    # TODO: some sort of generic starting point independent of
+    # system/temperature
+    dmu1 = dmu2 = 1
+    mu1 = -1
+    mu2 = 1
+    sign = -1 if alt_convention else 1
+    if verbose:
+        print(f"# Finding chemical potential to match <N> = {target:13.8e}")
+    while numpy.sign(dmu1) * numpy.sign(dmu2) > 0:
+        rho1 = compute_rho(rho, mu1, beta, sign=sign)
+        dmat = one_rdm_stable(rho1, num_bins)
+        dmu1 = delta_nav(dmat, target)
+        rho2 = compute_rho(rho, mu2, beta, sign=sign)
+        dmat = one_rdm_stable(rho2, num_bins)
+        dmu2 = delta_nav(dmat, target)
+        if numpy.sign(dmu1) * numpy.sign(dmu2) < 0:
+            if verbose:
+                print(f"# Chemical potential lies within range of [{mu1:f},{mu2:f}]")
+                print(f"# delta_mu1 = {dmu1.real:f}, delta_mu2 = {dmu2.real:f}")
+            break
+        else:
+            mu1 -= 2
+            mu2 += 2
+            if verbose:
+                print(f"# Increasing chemical potential search to [{mu1:f},{mu2:f}]")
+    found_mu = False
+    if verbose:
+        print("# " + format_fixed_width_strings(["iteration", "mu", "Dmu", "<N>"]))
+    for i in range(0, max_it):
+        mu = 0.5 * (mu1 + mu2)
+        rho_mu = compute_rho(rho, mu, beta, sign=sign)
+        dmat = one_rdm_stable(rho_mu, num_bins)
+        dmu = delta_nav(dmat, target).real
+        if verbose:
+            out = [i, mu, dmu, particle_number(dmat).real]
+            print("# " + format_fixed_width_floats(out))
+        if abs(dmu) < deps:
+            found_mu = True
+            break
+        else:
+            if dmu * dmu1 > 0:
+                mu1 = mu
+            elif dmu * dmu2 > 0:
+                mu2 = mu
+    if found_mu:
+        return mu
+    else:
+        print("# Error chemical potential not found")
+        return None
+
+
+def delta_nav(dm, nav):
+    return particle_number(dm) - nav
+
+
+def compute_rho(rho, mu, beta, sign=1):
+    return numpy.einsum("ijk,k->ijk", rho, numpy.exp(sign * beta * mu * numpy.ones(rho.shape[-1])))
diff --git a/ipie/addons/thermal/trial/mean_field.py b/ipie/addons/thermal/trial/mean_field.py
new file mode 100644
index 00000000..d205f536
--- /dev/null
+++ b/ipie/addons/thermal/trial/mean_field.py
@@ -0,0 +1,149 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+import scipy.linalg
+
+from ipie.addons.thermal.estimators.generic import fock_generic
+from ipie.addons.thermal.estimators.greens_function import greens_function
+from ipie.addons.thermal.estimators.particle_number import particle_number
+from ipie.addons.thermal.estimators.thermal import one_rdm_stable
+from ipie.addons.thermal.trial.chem_pot import compute_rho, find_chemical_potential
+from ipie.addons.thermal.trial.one_body import OneBody
+
+
+class MeanField(OneBody):
+    def __init__(
+        self,
+        hamiltonian,
+        nelec,
+        beta,
+        dt,
+        options=None,
+        alt_convention=False,
+        H1=None,
+        verbose=False,
+    ):
+        if options is None:
+            options = {}
+
+        super().__init__(
+            hamiltonian,
+            nelec,
+            beta,
+            dt,
+            options=options,
+            alt_convention=alt_convention,
+            H1=H1,
+            verbose=verbose,
+        )
+        if verbose:
+            print("# Building THF density matrix.")
+
+        self.alpha = options.get("alpha", 0.75)
+        self.max_scf_it = options.get("max_scf_it", self.max_it)
+        self.max_macro_it = options.get("max_macro_it", self.max_it)
+        self.find_mu = options.get("find_mu", True)
+        self.P, HMF, self.mu = self.thermal_hartree_fock(hamiltonian, beta)
+        muN = self.mu * numpy.eye(hamiltonian.nbasis, dtype=self.G.dtype)
+        self.dmat = numpy.array(
+            [scipy.linalg.expm(-dt * (HMF[0] - muN)), scipy.linalg.expm(-dt * (HMF[1] - muN))]
+        )
+        self.dmat_inv = numpy.array(
+            [
+                scipy.linalg.inv(self.dmat[0], check_finite=False),
+                scipy.linalg.inv(self.dmat[1], check_finite=False),
+            ]
+        )
+        self.G = numpy.array([greens_function(self.dmat[0]), greens_function(self.dmat[1])])
+        self.nav = particle_number(self.P).real
+
+    def thermal_hartree_fock(self, hamiltonian, beta):
+        dt = self.dtau
+        mu_old = self.mu
+        P = self.P.copy()
+
+        if self.verbose:
+            print("# Determining Thermal Hartree-Fock Density Matrix.")
+
+        for it in range(self.max_macro_it):
+            if self.verbose:
+                print(f"\n# Macro iteration: {it}")
+
+            HMF = self.scf(hamiltonian, beta, mu_old, P)
+            rho = numpy.array([scipy.linalg.expm(-dt * HMF[0]), scipy.linalg.expm(-dt * HMF[1])])
+            if self.find_mu:
+                mu = find_chemical_potential(
+                    self.alt_convention,
+                    rho,
+                    dt,
+                    self.nstack,
+                    self.nav,
+                    deps=self.deps,
+                    max_it=self.max_it,
+                    verbose=self.verbose,
+                )
+
+            else:
+                mu = self.mu
+
+            rho_mu = compute_rho(rho, mu_old, dt)
+            P = one_rdm_stable(rho_mu, self.nstack)
+            dmu = abs(mu - mu_old)
+
+            if self.verbose:
+                print(f"# New mu: {mu:13.8e} Old mu: {mu_old:13.8e} Dmu: {dmu:13.8e}")
+
+            if dmu < self.deps:
+                break
+
+            mu_old = mu
+
+        return P, HMF, mu
+
+    def scf(self, hamiltonian, beta, mu, P):
+        # Compute HMF
+        HMF = fock_generic(hamiltonian, P)
+        dt = self.dtau
+        muN = mu * numpy.eye(hamiltonian.nbasis, dtype=self.G.dtype)
+        rho = numpy.array(
+            [scipy.linalg.expm(-dt * (HMF[0] - muN)), scipy.linalg.expm(-dt * (HMF[1] - muN))]
+        )
+        Pold = one_rdm_stable(rho, self.nstack)
+
+        if self.verbose:
+            print("# Running Thermal SCF.")
+
+        for _ in range(self.max_scf_it):
+            HMF = fock_generic(hamiltonian, Pold)
+            rho = numpy.array(
+                [scipy.linalg.expm(-dt * (HMF[0] - muN)), scipy.linalg.expm(-dt * (HMF[1] - muN))]
+            )
+            Pnew = (1 - self.alpha) * one_rdm_stable(rho, self.nstack) + self.alpha * Pold
+            change = numpy.linalg.norm(Pnew - Pold)
+
+            if change < self.deps:
+                break
+
+            Pold = Pnew.copy()
+
+        if self.verbose:
+            N = particle_number(P).real
+            print(f"# Average particle number: {N:13.8e}")
+
+        return HMF
diff --git a/ipie/addons/thermal/trial/one_body.py b/ipie/addons/thermal/trial/one_body.py
new file mode 100644
index 00000000..50d15b44
--- /dev/null
+++ b/ipie/addons/thermal/trial/one_body.py
@@ -0,0 +1,164 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+import scipy.linalg
+
+from ipie.addons.thermal.estimators.greens_function import greens_function
+from ipie.addons.thermal.estimators.particle_number import particle_number
+from ipie.addons.thermal.estimators.thermal import one_rdm_stable
+from ipie.addons.thermal.trial.chem_pot import compute_rho, find_chemical_potential
+from ipie.utils.misc import update_stack
+
+
+class OneBody:
+    def __init__(
+        self,
+        hamiltonian,
+        nelec,
+        beta,
+        dt,
+        options=None,
+        alt_convention=False,
+        H1=None,
+        verbose=False,
+    ):
+        if options is None:
+            options = {}
+
+        self.name = "thermal"
+        self.compute_trial_energy = False
+        self.verbose = verbose
+        self.alt_convention = alt_convention
+
+        if H1 is None:
+            try:
+                self.H1 = hamiltonian.H1
+
+            except AttributeError:
+                self.H1 = hamiltonian.h1e
+
+        else:
+            self.H1 = H1
+
+        if verbose:
+            print("# Building OneBody density matrix.")
+            print(f"# beta in OneBody: {beta}")
+            print(f"# dt in OneBody: {dt}")
+
+        dmat_up = scipy.linalg.expm(-dt * (self.H1[0]))
+        dmat_down = scipy.linalg.expm(-dt * (self.H1[1]))
+        self.dmat = numpy.array([dmat_up, dmat_down])
+        cond = numpy.linalg.cond(self.dmat[0])
+
+        if verbose:
+            print(f"# condition number of BT: {cond: 10e}")
+
+        self.nelec = nelec
+        self.nav = options.get("nav", None)
+
+        if self.nav is None:
+            self.nav = numpy.sum(self.nelec)
+
+        if verbose:
+            print(f"# Target average electron number: {self.nav}")
+
+        self.max_it = options.get("max_it", 1000)
+        self.deps = options.get("threshold", 1e-6)
+        self.mu = options.get("mu", None)
+
+        self.nslice = int(beta / dt)
+        self.stack_size = options.get("stack_size", None)
+
+        if self.stack_size == None:
+            if verbose:
+                print("# Estimating stack size from BT.")
+
+            self.cond = numpy.linalg.cond(self.dmat[0])
+            # We will end up multiplying many BTs together. Can roughly determine
+            # safe stack size from condition number of BT as the condition number of
+            # the product will scale roughly as cond(BT)^(number of products).
+            # We can determine a conservative stack size by requiring that the
+            # condition number of the product does not exceed 1e3.
+            self.stack_size = min(self.nslice, int(3.0 / numpy.log10(self.cond)))
+
+            if verbose:
+                print(f"# Initial stack size, # of slices: {self.stack_size}, {self.nslice}")
+
+        # Adjust stack size
+        self.stack_size = update_stack(self.stack_size, self.nslice, verbose=verbose)
+        self.nstack = int(beta / (self.stack_size * dt))
+
+        if verbose:
+            print(f"# Number of stacks: {self.nstack}")
+
+        sign = 1
+        if self.alt_convention:
+            if verbose:
+                print("# Using alternate sign convention for chemical potential.")
+
+            sign = -1
+
+        self.dtau = self.stack_size * dt
+
+        if self.mu is None:
+            self.rho = numpy.array(
+                [
+                    scipy.linalg.expm(-self.dtau * (self.H1[0])),
+                    scipy.linalg.expm(-self.dtau * (self.H1[1])),
+                ]
+            )
+            self.mu = find_chemical_potential(
+                self.alt_convention,
+                self.rho,
+                self.dtau,
+                self.nstack,
+                self.nav,
+                deps=self.deps,
+                max_it=self.max_it,
+                verbose=verbose,
+            )
+
+        else:
+            self.rho = numpy.array(
+                [
+                    scipy.linalg.expm(-self.dtau * (self.H1[0])),
+                    scipy.linalg.expm(-self.dtau * (self.H1[1])),
+                ]
+            )
+
+        if self.verbose:
+            print(f"# Chemical potential in trial density matrix: {self.mu: .10e}")
+
+        self.P = one_rdm_stable(compute_rho(self.rho, self.mu, self.dtau, sign=sign), self.nstack)
+        self.nav = particle_number(self.P).real
+
+        if self.verbose:
+            print(f"# Average particle number in trial density matrix: {self.nav}")
+
+        self.dmat = compute_rho(self.dmat, self.mu, dt, sign=sign)
+        self.dmat_inv = numpy.array(
+            [
+                scipy.linalg.inv(self.dmat[0], check_finite=False),
+                scipy.linalg.inv(self.dmat[1], check_finite=False),
+            ]
+        )
+
+        self.G = numpy.array([greens_function(self.dmat[0]), greens_function(self.dmat[1])])
+        self.error = False
+        self.init = numpy.array([0])
diff --git a/ipie/addons/thermal/trial/tests/__init__.py b/ipie/addons/thermal/trial/tests/__init__.py
new file mode 100644
index 00000000..f91ef518
--- /dev/null
+++ b/ipie/addons/thermal/trial/tests/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
diff --git a/ipie/addons/thermal/trial/tests/test_chem_pot.py b/ipie/addons/thermal/trial/tests/test_chem_pot.py
new file mode 100644
index 00000000..b6b74e7b
--- /dev/null
+++ b/ipie/addons/thermal/trial/tests/test_chem_pot.py
@@ -0,0 +1,50 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+import scipy.linalg
+import pytest
+
+from ipie.addons.thermal.trial.chem_pot import find_chemical_potential
+from ipie.legacy.trial_density_matrices.chem_pot import (
+    find_chemical_potential as legacy_find_chemical_potential,
+)
+
+
+@pytest.mark.unit
+def test_find_chemical_potential():
+    dt = 0.01
+    beta = 1
+    stack_size = 3
+    nstack = 20
+    nav = 7
+    nbsf = 14
+    alt_convention = False
+
+    dtau = dt * stack_size
+    h1e = numpy.random.random((nbsf, nbsf))
+    rho = numpy.array([scipy.linalg.expm(-dtau * h1e), scipy.linalg.expm(-dtau * h1e)])
+
+    mu = find_chemical_potential(alt_convention, rho, dt, nstack, nav)
+    legacy_mu = legacy_find_chemical_potential(alt_convention, rho, dt, nstack, nav)
+
+    numpy.testing.assert_allclose(mu, legacy_mu)
+
+
+if __name__ == "__main__":
+    test_find_chemical_potential()
diff --git a/ipie/addons/thermal/trial/tests/test_mean_field.py b/ipie/addons/thermal/trial/tests/test_mean_field.py
new file mode 100644
index 00000000..57971904
--- /dev/null
+++ b/ipie/addons/thermal/trial/tests/test_mean_field.py
@@ -0,0 +1,101 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+import pytest
+
+try:
+    from ipie.legacy.trial_density_matrices.mean_field import MeanField as LegacyMeanField
+
+    _no_cython = False
+
+except ModuleNotFoundError:
+    _no_cython = True
+
+from ipie.systems.generic import Generic
+from ipie.utils.testing import generate_hamiltonian
+from ipie.hamiltonians.generic import Generic as HamGeneric
+from ipie.addons.thermal.trial.mean_field import MeanField
+from ipie.legacy.hamiltonians._generic import Generic as LegacyHamGeneric
+
+
+@pytest.mark.skipif(_no_cython, reason="Need to build cython modules.")
+@pytest.mark.unit
+def test_mean_field():
+    nup = 5
+    ndown = 5
+    nelec = (nup, ndown)
+    nbasis = 10
+
+    mu = -10.0
+    beta = 0.1
+    timestep = 0.01
+
+    alt_convention = False
+    sparse = False
+    complex_integrals = True
+    verbose = True
+
+    sym = 8
+    if complex_integrals:
+        sym = 4
+
+    # Test.
+    system = Generic(nelec)
+    h1e, chol, _, eri = generate_hamiltonian(
+        nbasis, nelec, cplx=complex_integrals, sym=sym, tol=1e-10
+    )
+    hamiltonian = HamGeneric(
+        h1e=numpy.array([h1e, h1e]), chol=chol.reshape((-1, nbasis**2)).T.copy(), ecore=0
+    )
+    trial = MeanField(hamiltonian, nelec, beta, timestep, verbose=verbose)
+
+    # Lgeacy.
+    legacy_system = Generic(nelec, verbose=verbose)
+    legacy_system.mu = mu
+    legacy_hamiltonian = LegacyHamGeneric(
+        h1e=hamiltonian.H1, chol=hamiltonian.chol, ecore=hamiltonian.ecore, verbose=verbose
+    )
+    legacy_hamiltonian.hs_pot = numpy.copy(hamiltonian.chol)
+    legacy_hamiltonian.hs_pot = legacy_hamiltonian.hs_pot.T.reshape(
+        (hamiltonian.nchol, hamiltonian.nbasis, hamiltonian.nbasis)
+    )
+    legacy_hamiltonian.mu = mu
+    legacy_hamiltonian._alt_convention = alt_convention
+    legacy_hamiltonian.sparse = sparse
+    legacy_trial = LegacyMeanField(
+        legacy_system, legacy_hamiltonian, beta, timestep, verbose=verbose
+    )
+
+    assert trial.nelec == nelec
+    numpy.testing.assert_almost_equal(trial.nav, numpy.sum(nelec), decimal=5)
+    assert trial.rho.shape == (2, nbasis, nbasis)
+    assert trial.dmat.shape == (2, nbasis, nbasis)
+    assert trial.P.shape == (2, nbasis, nbasis)
+    assert trial.G.shape == (2, nbasis, nbasis)
+
+    numpy.testing.assert_allclose(trial.mu, legacy_trial.mu)
+    numpy.testing.assert_allclose(trial.nav, legacy_trial.nav)
+    numpy.testing.assert_allclose(trial.P, legacy_trial.P)
+    numpy.testing.assert_allclose(trial.G, legacy_trial.G)
+    numpy.testing.assert_allclose(trial.dmat, legacy_trial.dmat)
+    numpy.testing.assert_allclose(trial.dmat_inv, legacy_trial.dmat_inv)
+
+
+if __name__ == "__main__":
+    test_mean_field()
diff --git a/ipie/addons/thermal/trial/tests/test_one_body.py b/ipie/addons/thermal/trial/tests/test_one_body.py
new file mode 100644
index 00000000..5dfe69cc
--- /dev/null
+++ b/ipie/addons/thermal/trial/tests/test_one_body.py
@@ -0,0 +1,65 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+import pytest
+
+from ipie.systems.generic import Generic
+from ipie.utils.testing import generate_hamiltonian
+from ipie.hamiltonians.generic import Generic as HamGeneric
+from ipie.addons.thermal.trial.one_body import OneBody
+
+
+@pytest.mark.unit
+def test_one_body():
+    nup = 5
+    ndown = 5
+    nelec = (nup, ndown)
+    nbasis = 10
+
+    mu = -1.0
+    beta = 0.1
+    timestep = 0.01
+
+    complex_integrals = True
+    verbose = True
+
+    sym = 8
+    if complex_integrals:
+        sym = 4
+
+    # Test.
+    system = Generic(nelec)
+    h1e, chol, _, eri = generate_hamiltonian(
+        nbasis, nelec, cplx=complex_integrals, sym=sym, tol=1e-10
+    )
+    hamiltonian = HamGeneric(
+        h1e=numpy.array([h1e, h1e]), chol=chol.reshape((-1, nbasis**2)).T.copy(), ecore=0
+    )
+    trial = OneBody(hamiltonian, nelec, beta, timestep, verbose=verbose)
+
+    assert trial.nelec == nelec
+    numpy.testing.assert_almost_equal(trial.nav, numpy.sum(nelec), decimal=6)
+    assert trial.rho.shape == (2, nbasis, nbasis)
+    assert trial.dmat.shape == (2, nbasis, nbasis)
+    assert trial.P.shape == (2, nbasis, nbasis)
+    assert trial.G.shape == (2, nbasis, nbasis)
+
+
+if __name__ == "__main__":
+    test_one_body()
diff --git a/ipie/addons/thermal/trial/utils.py b/ipie/addons/thermal/trial/utils.py
new file mode 100644
index 00000000..0ac49644
--- /dev/null
+++ b/ipie/addons/thermal/trial/utils.py
@@ -0,0 +1,82 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+from ipie.addons.thermal.trial.mean_field import MeanField
+from ipie.addons.thermal.trial.one_body import OneBody
+
+
+def get_trial_density_matrix(hamiltonian, nelec, beta, dt, options=None, comm=None, verbose=False):
+    """Wrapper to select trial wavefunction class.
+
+    Parameters
+    ----------
+
+    Returns
+    -------
+    trial : class or None
+        Trial density matrix class.
+    """
+    if options is None:
+        options = {}
+
+    trial_type = options.get("name", "one_body")
+    alt_convention = options.get("alt_convention", False)
+    if comm is None or comm.rank == 0:
+        if trial_type == "one_body_mod":
+            trial = OneBody(
+                hamiltonian,
+                nelec,
+                beta,
+                dt,
+                options=options,
+                H1=hamiltonian.h1e_mod,
+                verbose=verbose,
+            )
+
+        elif trial_type == "one_body":
+            trial = OneBody(
+                hamiltonian,
+                nelec,
+                beta,
+                dt,
+                options=options,
+                alt_convention=alt_convention,
+                verbose=verbose,
+            )
+
+        elif trial_type == "thermal_hartree_fock":
+            trial = MeanField(
+                hamiltonian,
+                nelec,
+                beta,
+                dt,
+                options=options,
+                alt_convention=alt_convention,
+                verbose=verbose,
+            )
+
+        else:
+            trial = None
+
+    else:
+        trial = None
+
+    if comm is not None:
+        trial = comm.bcast(trial)
+
+    return trial
diff --git a/ipie/addons/thermal/utils/__init__.py b/ipie/addons/thermal/utils/__init__.py
new file mode 100644
index 00000000..f91ef518
--- /dev/null
+++ b/ipie/addons/thermal/utils/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
diff --git a/ipie/addons/thermal/utils/legacy_testing.py b/ipie/addons/thermal/utils/legacy_testing.py
new file mode 100644
index 00000000..feb5d28b
--- /dev/null
+++ b/ipie/addons/thermal/utils/legacy_testing.py
@@ -0,0 +1,506 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+from typing import Tuple, Union
+
+import numpy
+
+from ipie.addons.thermal.qmc.options import ThermalQMCOpts
+from ipie.legacy.hamiltonians._generic import Generic as LegacyHamGeneric
+from ipie.legacy.hamiltonians.ueg import UEG as LegacyHamUEG
+from ipie.legacy.qmc.thermal_afqmc import ThermalAFQMC as LegacyThermalAFQMC
+from ipie.legacy.systems.ueg import UEG as LegacyUEG
+from ipie.legacy.thermal_propagation.continuous import Continuous
+from ipie.legacy.thermal_propagation.planewave import PlaneWave
+from ipie.legacy.trial_density_matrices.mean_field import MeanField as LegacyMeanField
+from ipie.legacy.trial_density_matrices.onebody import OneBody as LegacyOneBody
+from ipie.legacy.walkers.handler import Walkers
+from ipie.systems.generic import Generic
+from ipie.utils.mpi import MPIHandler
+
+
+def legacy_propagate_walkers(
+    legacy_hamiltonian, legacy_trial, legacy_walkers, legacy_propagator, xi=None
+):
+    if xi is None:
+        xi = [None] * len(legacy_walkers)
+
+    for iw, walker in enumerate(legacy_walkers.walkers):
+        legacy_propagator.propagate_walker(legacy_hamiltonian, walker, legacy_trial, xi=xi[iw])
+
+    return legacy_walkers
+
+
+def build_legacy_generic_test_case_handlers(
+    hamiltonian,
+    comm,
+    nelec: Tuple[int, int],
+    mu: float,
+    beta: float,
+    timestep: float,
+    nwalkers: int = 100,
+    stack_size: int = 10,
+    lowrank: bool = False,
+    lowrank_thresh: float = 1e-6,
+    stabilize_freq: int = 5,
+    pop_control_freq: int = 5,
+    pop_control_method: str = "pair_branch",
+    alt_convention: bool = False,
+    sparse: bool = False,
+    mf_trial: bool = True,
+    propagate: bool = False,
+    seed: Union[int, None] = None,
+    verbose: int = 0,
+):
+    numpy.random.seed(seed)
+
+    legacy_options = {
+        "walkers": {
+            "stack_size": stack_size,
+            "low_rank": lowrank,
+            "low_rank_thresh": lowrank_thresh,
+            "pop_control": pop_control_method,
+        },
+        "propagator": {"optimised": False},
+    }
+
+    # 1. Build system.
+    legacy_system = Generic(nelec, verbose=verbose)
+    legacy_system.mu = mu
+
+    # 2. Build Hamiltonian.
+    legacy_hamiltonian = LegacyHamGeneric(
+        h1e=hamiltonian.H1, chol=hamiltonian.chol, ecore=hamiltonian.ecore
+    )
+    legacy_hamiltonian.hs_pot = numpy.copy(hamiltonian.chol)
+    legacy_hamiltonian.hs_pot = legacy_hamiltonian.hs_pot.T.reshape(
+        (hamiltonian.nchol, hamiltonian.nbasis, hamiltonian.nbasis)
+    )
+    legacy_hamiltonian.mu = mu
+    legacy_hamiltonian._alt_convention = alt_convention
+    legacy_hamiltonian.sparse = sparse
+
+    # 3. Build trial.
+    legacy_trial = LegacyOneBody(legacy_system, legacy_hamiltonian, beta, timestep, verbose=verbose)
+    if mf_trial:
+        legacy_trial = LegacyMeanField(
+            legacy_system, legacy_hamiltonian, beta, timestep, verbose=verbose
+        )
+    # 4. Build walkers.
+    qmc_opts = ThermalQMCOpts()
+    qmc_opts.nwalkers = nwalkers
+    qmc_opts.ntot_walkers = nwalkers
+    qmc_opts.beta = beta
+    qmc_opts.nsteps = 1
+    qmc_opts.dt = timestep
+    qmc_opts.nstblz = stabilize_freq
+    qmc_opts.npop_control = pop_control_freq
+    qmc_opts.pop_control_method = pop_control_method
+    qmc_opts.seed = seed
+
+    legacy_walkers = Walkers(
+        legacy_system,
+        legacy_hamiltonian,
+        legacy_trial,
+        qmc_opts,
+        walker_opts=legacy_options["walkers"],
+        verbose=verbose,
+        comm=comm,
+    )
+
+    # 5. Build propagator.
+    legacy_propagator = Continuous(
+        legacy_options["propagator"],
+        qmc_opts,
+        legacy_system,
+        legacy_hamiltonian,
+        legacy_trial,
+        verbose=verbose,
+        lowrank=lowrank,
+    )
+
+    if propagate:
+        for _ in range(legacy_walkers[0].stack.ntime_slices):
+            for _, walker in enumerate(legacy_walkers):
+                legacy_propagator.propagate_walker(legacy_hamiltonian, walker, legacy_trial)
+
+    legacy_objs = {
+        "system": legacy_system,
+        "trial": legacy_trial,
+        "hamiltonian": legacy_hamiltonian,
+        "walkers": legacy_walkers,
+        "propagator": legacy_propagator,
+    }
+    return legacy_objs
+
+
+def build_legacy_generic_test_case_handlers_mpi(
+    hamiltonian,
+    mpi_handler: MPIHandler,
+    nelec: Tuple[int, int],
+    mu: float,
+    beta: float,
+    timestep: float,
+    nwalkers: int = 100,
+    stack_size: int = 10,
+    lowrank: bool = False,
+    lowrank_thresh: float = 1e-6,
+    stabilize_freq: int = 5,
+    pop_control_freq: int = 5,
+    pop_control_method: str = "pair_branch",
+    alt_convention: bool = False,
+    sparse: bool = False,
+    mf_trial: bool = True,
+    propagate: bool = False,
+    seed: Union[int, None] = None,
+    verbose: int = 0,
+):
+    numpy.random.seed(seed)
+    comm = mpi_handler.comm
+
+    legacy_options = {
+        "walkers": {
+            "stack_size": stack_size,
+            "low_rank": lowrank,
+            "low_rank_thresh": lowrank_thresh,
+            "pop_control": pop_control_method,
+        },
+        "propagator": {"optimised": False},
+    }
+
+    # 1. Build system.
+    legacy_system = Generic(nelec, verbose=verbose)
+    legacy_system.mu = mu
+
+    # 2. Build Hamiltonian.
+    legacy_hamiltonian = LegacyHamGeneric(
+        h1e=hamiltonian.H1, chol=hamiltonian.chol, ecore=hamiltonian.ecore
+    )
+    legacy_hamiltonian.hs_pot = numpy.copy(hamiltonian.chol)
+    legacy_hamiltonian.hs_pot = legacy_hamiltonian.hs_pot.T.reshape(
+        (hamiltonian.nchol, hamiltonian.nbasis, hamiltonian.nbasis)
+    )
+    legacy_hamiltonian.mu = mu
+    legacy_hamiltonian._alt_convention = alt_convention
+    legacy_hamiltonian.sparse = sparse
+
+    # 3. Build trial.
+    legacy_trial = LegacyOneBody(legacy_system, legacy_hamiltonian, beta, timestep, verbose=verbose)
+    if mf_trial:
+        legacy_trial = LegacyMeanField(
+            legacy_system, legacy_hamiltonian, beta, timestep, verbose=verbose
+        )
+    # 4. Build walkers.
+    qmc_opts = ThermalQMCOpts()
+    qmc_opts.nwalkers = nwalkers
+    qmc_opts.ntot_walkers = nwalkers * comm.size
+    qmc_opts.beta = beta
+    qmc_opts.nsteps = 1
+    qmc_opts.dt = timestep
+    qmc_opts.nstblz = stabilize_freq
+    qmc_opts.npop_control = pop_control_freq
+    qmc_opts.pop_control_method = pop_control_method
+    qmc_opts.seed = seed
+
+    legacy_walkers = Walkers(
+        legacy_system,
+        legacy_hamiltonian,
+        legacy_trial,
+        qmc_opts,
+        walker_opts=legacy_options["walkers"],
+        verbose=verbose,
+        comm=comm,
+    )
+
+    # 5. Build propagator.
+    legacy_propagator = Continuous(
+        legacy_options["propagator"],
+        qmc_opts,
+        legacy_system,
+        legacy_hamiltonian,
+        legacy_trial,
+        verbose=verbose,
+        lowrank=lowrank,
+    )
+
+    if propagate:
+        for _ in range(legacy_walkers[0].stack.ntime_slices):
+            for _, walker in enumerate(legacy_walkers):
+                legacy_propagator.propagate_walker(legacy_hamiltonian, walker, legacy_trial)
+
+    legacy_objs = {
+        "system": legacy_system,
+        "trial": legacy_trial,
+        "hamiltonian": legacy_hamiltonian,
+        "walkers": legacy_walkers,
+        "propagator": legacy_propagator,
+    }
+    return legacy_objs
+
+
+def build_legacy_driver_generic_test_instance(
+    hamiltonian,
+    comm,
+    nelec: Tuple[int, int],
+    mu: float,
+    beta: float,
+    timestep: float,
+    nblocks: int,
+    nwalkers: int = 100,
+    stack_size: int = 10,
+    lowrank: bool = False,
+    lowrank_thresh: float = 1e-6,
+    stabilize_freq: int = 5,
+    pop_control_freq: int = 5,
+    pop_control_method: str = "pair_branch",
+    alt_convention: bool = False,
+    sparse: bool = False,
+    seed: Union[int, None] = None,
+    estimator_filename: Union[str, None] = None,
+    verbose: int = 0,
+):
+    nup, ndown = nelec
+    numpy.random.seed(seed)
+
+    legacy_options = {
+        "qmc": {
+            "dt": timestep,
+            # Input of `nwalkers` refers to the total number of walkers in
+            # legacy `ThermalAFQMC`.
+            "nwalkers": nwalkers * comm.size,
+            "blocks": nblocks,
+            "nsteps": 1,
+            "beta": beta,
+            "stabilise_freq": stabilize_freq,
+            "pop_control_freq": pop_control_freq,
+            "pop_control_method": pop_control_method,
+            "rng_seed": seed,
+            "batched": False,
+        },
+        "propagator": {"optimised": False},
+        "walkers": {
+            "stack_size": stack_size,
+            "low_rank": lowrank,
+            "low_rank_thresh": lowrank_thresh,
+            "pop_control": pop_control_method,
+        },
+        "system": {"name": "Generic", "nup": nup, "ndown": ndown, "mu": mu},
+        "estimators": {
+            "filename": estimator_filename,
+        },
+    }
+
+    legacy_system = Generic(nelec)
+    legacy_system.mu = mu
+    legacy_hamiltonian = LegacyHamGeneric(
+        h1e=hamiltonian.H1, chol=hamiltonian.chol, ecore=hamiltonian.ecore
+    )
+    legacy_hamiltonian.hs_pot = numpy.copy(hamiltonian.chol)
+    legacy_hamiltonian.hs_pot = legacy_hamiltonian.hs_pot.T.reshape(
+        (hamiltonian.nchol, hamiltonian.nbasis, hamiltonian.nbasis)
+    )
+    legacy_hamiltonian.mu = mu
+    legacy_hamiltonian._alt_convention = alt_convention
+    legacy_hamiltonian.sparse = sparse
+    legacy_trial = LegacyMeanField(legacy_system, legacy_hamiltonian, beta, timestep)
+
+    afqmc = LegacyThermalAFQMC(
+        comm, legacy_options, legacy_system, legacy_hamiltonian, legacy_trial, verbose=verbose
+    )
+    return afqmc
+
+
+def build_legacy_ueg_test_case_handlers(
+    comm,
+    nelec: Tuple[int, int],
+    rs: float,
+    ecut: float,
+    mu: float,
+    beta: float,
+    timestep: float,
+    nwalkers: int = 100,
+    stack_size: int = 10,
+    lowrank: bool = False,
+    lowrank_thresh: float = 1e-6,
+    propagate: bool = False,
+    stabilize_freq: int = 5,
+    pop_control_freq: int = 5,
+    pop_control_method: str = "pair_branch",
+    alt_convention: bool = False,
+    sparse: bool = False,
+    seed: Union[int, None] = None,
+    verbose: int = 0,
+):
+    numpy.random.seed(seed)
+    nup, ndown = nelec
+    legacy_options = {
+        "ueg": {
+            "nup": nup,
+            "ndown": ndown,
+            "rs": rs,
+            "ecut": ecut,
+            "thermal": True,
+            "write_integrals": False,
+            "low_rank": lowrank,
+        },
+        "propagator": {"optimised": False},
+        "walkers": {
+            "stack_size": stack_size,
+            "low_rank": lowrank,
+            "low_rank_thresh": lowrank_thresh,
+            "pop_control": pop_control_method,
+        },
+    }
+
+    # 1. Build out system.
+    legacy_system = LegacyUEG(options=legacy_options["ueg"])
+    legacy_system.mu = mu
+
+    # 2. Build Hamiltonian.
+    legacy_hamiltonian = LegacyHamUEG(legacy_system, options=legacy_options["ueg"])
+    legacy_hamiltonian.mu = mu
+    legacy_hamiltonian._alt_convention = alt_convention
+
+    # 3. Build trial.
+    legacy_trial = LegacyOneBody(legacy_system, legacy_hamiltonian, beta, timestep, verbose=verbose)
+
+    # 4. Build walkers.
+    qmc_opts = ThermalQMCOpts()
+    qmc_opts.nwalkers = nwalkers
+    qmc_opts.ntot_walkers = nwalkers * comm.size
+    qmc_opts.beta = beta
+    qmc_opts.nsteps = 1
+    qmc_opts.dt = timestep
+    qmc_opts.nstblz = stabilize_freq
+    qmc_opts.npop_control = pop_control_freq
+    qmc_opts.pop_control_method = pop_control_method
+    qmc_opts.seed = seed
+
+    legacy_walkers = Walkers(
+        legacy_system,
+        legacy_hamiltonian,
+        legacy_trial,
+        qmc_opts,
+        walker_opts=legacy_options["walkers"],
+        verbose=verbose,
+        comm=comm,
+    )
+
+    # 5. Build propagator.
+    legacy_propagator = PlaneWave(
+        legacy_system,
+        legacy_hamiltonian,
+        legacy_trial,
+        qmc_opts,
+        options=legacy_options["propagator"],
+        lowrank=lowrank,
+        verbose=verbose,
+    )
+
+    if propagate:
+        for _ in range(legacy_walkers[0].stack.ntime_slices):
+            for _, walker in enumerate(legacy_walkers):
+                legacy_propagator.propagate_walker(legacy_hamiltonian, walker, legacy_trial)
+
+    legacy_objs = {
+        "system": legacy_system,
+        "trial": legacy_trial,
+        "hamiltonian": legacy_hamiltonian,
+        "walkers": legacy_walkers,
+        "propagator": legacy_propagator,
+    }
+    return legacy_objs
+
+
+def build_legacy_driver_ueg_test_instance(
+    comm,
+    nelec: Tuple[int, int],
+    rs: float,
+    ecut: float,
+    mu: float,
+    beta: float,
+    timestep: float,
+    nblocks: int,
+    nwalkers: int = 100,
+    stack_size: int = 10,
+    lowrank: bool = False,
+    lowrank_thresh: float = 1e-6,
+    stabilize_freq: int = 5,
+    pop_control_freq: int = 5,
+    pop_control_method: str = "pair_branch",
+    alt_convention: bool = False,
+    sparse: bool = False,
+    seed: Union[int, None] = None,
+    estimator_filename: Union[str, None] = None,
+    verbose: int = 0,
+):
+    numpy.random.seed(seed)
+    nup, ndown = nelec
+    legacy_options = {
+        "ueg": {
+            "nup": nup,
+            "ndown": ndown,
+            "rs": rs,
+            "ecut": ecut,
+            "thermal": True,
+            "write_integrals": False,
+            "low_rank": lowrank,
+        },
+        "qmc": {
+            "dt": timestep,
+            # Input of `nwalkers` refers to the total number of walkers in
+            # legacy `ThermalAFQMC`.
+            "nwalkers": nwalkers * comm.size,
+            "blocks": nblocks,
+            "nsteps": 1,
+            "beta": beta,
+            "stabilise_freq": stabilize_freq,
+            "pop_control_freq": pop_control_freq,
+            "pop_control_method": pop_control_method,
+            "rng_seed": seed,
+            "batched": False,
+        },
+        "propagator": {"optimised": False},
+        "walkers": {
+            "stack_size": stack_size,
+            "low_rank": lowrank,
+            "low_rank_thresh": lowrank_thresh,
+            "pop_control": pop_control_method,
+        },
+        "estimators": {
+            "filename": estimator_filename,
+        },
+    }
+
+    # 1. Build out system.
+    legacy_system = LegacyUEG(options=legacy_options["ueg"])
+    legacy_system.mu = mu
+
+    # 2. Build Hamiltonian.
+    legacy_hamiltonian = LegacyHamUEG(legacy_system, options=legacy_options["ueg"])
+    legacy_hamiltonian.mu = mu
+    legacy_hamiltonian._alt_convention = alt_convention
+
+    # 3. Build trial.
+    legacy_trial = LegacyOneBody(legacy_system, legacy_hamiltonian, beta, timestep, verbose=verbose)
+
+    # 4. Build Thermal AFQMC.
+    afqmc = LegacyThermalAFQMC(
+        comm, legacy_options, legacy_system, legacy_hamiltonian, legacy_trial, verbose=verbose
+    )
+    return afqmc
diff --git a/ipie/addons/thermal/utils/testing.py b/ipie/addons/thermal/utils/testing.py
new file mode 100644
index 00000000..f828c155
--- /dev/null
+++ b/ipie/addons/thermal/utils/testing.py
@@ -0,0 +1,410 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+from typing import Tuple, Union
+
+import numpy
+
+from ipie.addons.thermal.propagation.phaseless_generic import PhaselessGeneric
+from ipie.addons.thermal.qmc.thermal_afqmc import ThermalAFQMC
+from ipie.addons.thermal.trial.mean_field import MeanField
+from ipie.addons.thermal.trial.one_body import OneBody
+from ipie.addons.thermal.utils.ueg import UEG
+from ipie.addons.thermal.walkers.uhf_walkers import UHFThermalWalkers
+from ipie.hamiltonians.generic import Generic as HamGeneric
+from ipie.utils.mpi import MPIHandler
+from ipie.utils.testing import generate_hamiltonian
+
+
+def build_generic_test_case_handlers(
+    nelec: Tuple[int, int],
+    nbasis: int,
+    mu: float,
+    beta: float,
+    timestep: float,
+    nwalkers: int = 100,
+    stack_size: int = 10,
+    lowrank: bool = False,
+    lowrank_thresh: float = 1e-6,
+    diagonal: bool = False,
+    mf_trial: bool = True,
+    propagate: bool = False,
+    complex_integrals: bool = False,
+    debug: bool = False,
+    with_eri: bool = False,
+    seed: Union[int, None] = None,
+    verbose: int = 0,
+):
+    sym = 8
+    if complex_integrals:
+        sym = 4
+    numpy.random.seed(seed)
+
+    # 1. Generate random integrals.
+    h1e, chol, _, eri = generate_hamiltonian(
+        nbasis, nelec, cplx=complex_integrals, sym=sym, tol=1e-10
+    )
+
+    if diagonal:
+        h1e = numpy.diag(numpy.diag(h1e))
+
+    # 2. Build Hamiltonian.
+    hamiltonian = HamGeneric(
+        h1e=numpy.array([h1e, h1e]), chol=chol.reshape((-1, nbasis**2)).T.copy(), ecore=0
+    )
+
+    # 3. Build trial.
+    trial = OneBody(hamiltonian, nelec, beta, timestep, verbose=verbose)
+
+    if mf_trial:
+        trial = MeanField(hamiltonian, nelec, beta, timestep, verbose=verbose)
+
+    # 4. Build walkers.
+    walkers = UHFThermalWalkers(
+        trial,
+        nbasis,
+        nwalkers,
+        stack_size=stack_size,
+        lowrank=lowrank,
+        lowrank_thresh=lowrank_thresh,
+        verbose=verbose,
+    )
+
+    # 5. Build propagator.
+    propagator = PhaselessGeneric(timestep, mu, lowrank=lowrank, verbose=verbose)
+    propagator.build(hamiltonian, trial=trial, walkers=walkers, verbose=verbose)
+
+    if propagate:
+        for _ in range(walkers.stack[0].nslice):
+            propagator.propagate_walkers(walkers, hamiltonian, trial, debug=debug)
+
+    objs = {
+        "trial": trial,
+        "hamiltonian": hamiltonian,
+        "walkers": walkers,
+        "propagator": propagator,
+    }
+
+    if with_eri:
+        objs["eri"] = eri
+
+    return objs
+
+
+def build_generic_test_case_handlers_mpi(
+    nelec: Tuple[int, int],
+    nbasis: int,
+    mu: float,
+    beta: float,
+    timestep: float,
+    mpi_handler: MPIHandler,
+    nwalkers: int = 100,
+    stack_size: int = 10,
+    lowrank: bool = False,
+    lowrank_thresh: float = 1e-6,
+    diagonal: bool = False,
+    mf_trial: bool = True,
+    propagate: bool = False,
+    complex_integrals: bool = False,
+    debug: bool = False,
+    seed: Union[int, None] = None,
+    verbose: int = 0,
+):
+    sym = 8
+    if complex_integrals:
+        sym = 4
+    numpy.random.seed(seed)
+
+    # 1. Generate random integrals.
+    h1e, chol, _, _ = generate_hamiltonian(
+        nbasis, nelec, cplx=complex_integrals, sym=sym, tol=1e-10
+    )
+
+    if diagonal:
+        h1e = numpy.diag(numpy.diag(h1e))
+
+    # 2. Build Hamiltonian.
+    hamiltonian = HamGeneric(
+        h1e=numpy.array([h1e, h1e]), chol=chol.reshape((-1, nbasis**2)).T.copy(), ecore=0
+    )
+
+    # 3. Build trial.
+    trial = OneBody(hamiltonian, nelec, beta, timestep, verbose=verbose)
+
+    if mf_trial:
+        trial = MeanField(hamiltonian, nelec, beta, timestep, verbose=verbose)
+
+    # 4. Build walkers.
+    walkers = UHFThermalWalkers(
+        trial,
+        nbasis,
+        nwalkers,
+        stack_size=stack_size,
+        lowrank=lowrank,
+        lowrank_thresh=lowrank_thresh,
+        mpi_handler=mpi_handler,
+        verbose=verbose,
+    )
+
+    # 5. Build propagator.
+    propagator = PhaselessGeneric(timestep, mu, lowrank=lowrank, verbose=verbose)
+    propagator.build(
+        hamiltonian, trial=trial, walkers=walkers, mpi_handler=mpi_handler, verbose=verbose
+    )
+
+    if propagate:
+        for _ in range(walkers.stack[0].nslice):
+            propagator.propagate_walkers(walkers, hamiltonian, trial, debug=debug)
+
+    objs = {
+        "trial": trial,
+        "hamiltonian": hamiltonian,
+        "walkers": walkers,
+        "propagator": propagator,
+    }
+    return objs
+
+
+def build_driver_generic_test_instance(
+    nelec: Tuple[int, int],
+    nbasis: int,
+    mu: float,
+    beta: float,
+    timestep: float,
+    nblocks: int,
+    nwalkers: int = 100,
+    stack_size: int = 10,
+    lowrank: bool = False,
+    lowrank_thresh: float = 1e-6,
+    stabilize_freq: int = 5,
+    pop_control_freq: int = 5,
+    pop_control_method: str = "pair_branch",
+    diagonal: bool = False,
+    complex_integrals: bool = False,
+    debug: bool = False,
+    seed: Union[int, None] = None,
+    verbose: int = 0,
+):
+    sym = 8
+    if complex_integrals:
+        sym = 4
+    numpy.random.seed(seed)
+
+    # 1. Generate random integrals.
+    h1e, chol, _, _ = generate_hamiltonian(
+        nbasis, nelec, cplx=complex_integrals, sym=sym, tol=1e-10
+    )
+
+    if diagonal:
+        h1e = numpy.diag(numpy.diag(h1e))
+
+    # 2. Build Hamiltonian.
+    hamiltonian = HamGeneric(
+        h1e=numpy.array([h1e, h1e]), chol=chol.reshape((-1, nbasis**2)).T.copy(), ecore=0
+    )
+
+    # 3. Build trial.
+    trial = MeanField(hamiltonian, nelec, beta, timestep)
+
+    # 4. Build Thermal AFQMC driver.
+    afqmc = ThermalAFQMC.build(
+        nelec,
+        mu,
+        beta,
+        hamiltonian,
+        trial,
+        nwalkers=nwalkers,
+        stack_size=stack_size,
+        seed=seed,
+        nblocks=nblocks,
+        timestep=timestep,
+        stabilize_freq=stabilize_freq,
+        pop_control_freq=pop_control_freq,
+        pop_control_method=pop_control_method,
+        lowrank=lowrank,
+        lowrank_thresh=lowrank_thresh,
+        debug=debug,
+        verbose=verbose,
+    )
+    return afqmc
+
+
+def build_ueg_test_case_handlers(
+    nelec: Tuple[int, int],
+    rs: float,
+    ecut: float,
+    mu: float,
+    beta: float,
+    timestep: float,
+    nwalkers: int = 100,
+    stack_size: int = 10,
+    lowrank: bool = False,
+    lowrank_thresh: float = 1e-6,
+    propagate: bool = False,
+    debug: bool = False,
+    seed: Union[int, None] = None,
+    verbose: int = 0,
+):
+    nup, ndown = nelec
+    ueg_opts = {
+        "nup": nup,
+        "ndown": ndown,
+        "rs": rs,
+        "ecut": ecut,
+        "thermal": True,
+        "write_integrals": False,
+        "low_rank": lowrank,
+    }
+
+    numpy.random.seed(seed)
+
+    # 1. Generate UEG integrals.
+    ueg = UEG(ueg_opts, verbose=verbose)
+    ueg.build(verbose=verbose)
+    nbasis = ueg.nbasis
+    nchol = ueg.nchol
+
+    if verbose:
+        print(f"# nbasis = {nbasis}")
+        print(f"# nchol = {nchol}")
+        print(f"# nup = {nup}")
+        print(f"# ndown = {ndown}")
+
+    h1 = ueg.H1[0]
+    chol = 2.0 * ueg.chol_vecs.toarray().copy()
+    ecore = 0.0
+
+    # 2. Build Hamiltonian.
+    hamiltonian = HamGeneric(
+        numpy.array([h1, h1], dtype=numpy.complex128),
+        numpy.array(chol, dtype=numpy.complex128),
+        ecore,
+        verbose=verbose,
+    )
+
+    # 3. Build trial.
+    trial = OneBody(hamiltonian, nelec, beta, timestep, verbose=verbose)
+
+    # 4. Build walkers.
+    walkers = UHFThermalWalkers(
+        trial,
+        nbasis,
+        nwalkers,
+        stack_size=stack_size,
+        lowrank=lowrank,
+        lowrank_thresh=lowrank_thresh,
+        verbose=verbose,
+    )
+
+    # 5. Build propagator.
+    propagator = PhaselessGeneric(timestep, mu, lowrank=lowrank, verbose=verbose)
+    propagator.build(hamiltonian, trial=trial, walkers=walkers, verbose=verbose)
+
+    if propagate:
+        for _ in range(walkers.stack[0].nslice):
+            propagator.propagate_walkers(walkers, hamiltonian, trial, debug=debug)
+
+    objs = {
+        "trial": trial,
+        "hamiltonian": hamiltonian,
+        "walkers": walkers,
+        "propagator": propagator,
+    }
+    return objs
+
+
+def build_driver_ueg_test_instance(
+    nelec: Tuple[int, int],
+    rs: float,
+    ecut: float,
+    mu: float,
+    beta: float,
+    timestep: float,
+    nblocks: int,
+    nwalkers: int = 100,
+    stack_size: int = 10,
+    lowrank: bool = False,
+    lowrank_thresh: float = 1e-6,
+    stabilize_freq: int = 5,
+    pop_control_freq: int = 5,
+    pop_control_method: str = "pair_branch",
+    debug: bool = False,
+    seed: Union[int, None] = None,
+    verbose: int = 0,
+):
+    nup, ndown = nelec
+    ueg_opts = {
+        "nup": nup,
+        "ndown": ndown,
+        "rs": rs,
+        "ecut": ecut,
+        "thermal": True,
+        "write_integrals": False,
+        "low_rank": lowrank,
+    }
+
+    numpy.random.seed(seed)
+
+    # 1. Generate UEG integrals.
+    ueg = UEG(ueg_opts, verbose=verbose)
+    ueg.build(verbose=verbose)
+    nbasis = ueg.nbasis
+    nchol = ueg.nchol
+
+    if verbose:
+        print(f"# nbasis = {nbasis}")
+        print(f"# nchol = {nchol}")
+        print(f"# nup = {nup}")
+        print(f"# ndown = {ndown}")
+
+    h1 = ueg.H1[0]
+    chol = 2.0 * ueg.chol_vecs.toarray().copy()
+    ecore = 0.0
+
+    # 2. Build Hamiltonian.
+    hamiltonian = HamGeneric(
+        numpy.array([h1, h1], dtype=numpy.complex128),
+        numpy.array(chol, dtype=numpy.complex128),
+        ecore,
+        verbose=verbose,
+    )
+
+    # 3. Build trial.
+    trial = OneBody(hamiltonian, nelec, beta, timestep, verbose=verbose)
+
+    # 4. Build Thermal AFQMC driver.
+    afqmc = ThermalAFQMC.build(
+        nelec,
+        mu,
+        beta,
+        hamiltonian,
+        trial,
+        nwalkers=nwalkers,
+        stack_size=stack_size,
+        seed=seed,
+        nblocks=nblocks,
+        timestep=timestep,
+        stabilize_freq=stabilize_freq,
+        pop_control_freq=pop_control_freq,
+        pop_control_method=pop_control_method,
+        lowrank=lowrank,
+        lowrank_thresh=lowrank_thresh,
+        debug=debug,
+        verbose=verbose,
+    )
+    return afqmc
diff --git a/ipie/addons/thermal/utils/ueg.py b/ipie/addons/thermal/utils/ueg.py
new file mode 100644
index 00000000..a1a9e1e3
--- /dev/null
+++ b/ipie/addons/thermal/utils/ueg.py
@@ -0,0 +1,547 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+import scipy.sparse
+from ipie.utils.io import write_qmcpack_sparse
+
+
+class UEG:
+    """UEG system class (integrals read from fcidump)
+
+    Parameters
+    ----------
+    nup : int
+        Number of up electrons.
+
+    ndown : int
+        Number of down electrons.
+
+    rs : float
+        Density parameter.
+
+    ecut : float
+        Scaled cutoff energy.
+
+    ktwist : :class:`numpy.ndarray`
+        Twist vector.
+
+    verbose : bool
+        Print extra information.
+
+    Attributes
+    ----------
+    T : :class:`numpy.ndarray`
+        One-body part of the Hamiltonian. This is diagonal in plane wave basis.
+
+    ecore : float
+        Madelung contribution to the total energy.
+
+    h1e_mod : :class:`numpy.ndarray`
+        Modified one-body Hamiltonian.
+
+    nfields : int
+        Number of field configurations per walker for back propagation.
+
+    basis : :class:`numpy.ndarray`
+        Basis vectors within a cutoff.
+
+    kfac : float
+        Scale factor (2pi/L).
+    """
+
+    def __init__(self, options, verbose=False):
+        if verbose:
+            print("# Parsing input options.")
+
+        self.name = "UEG"
+        self.nup = options.get("nup")
+        self.ndown = options.get("ndown")
+        self.nelec = (self.nup, self.ndown)
+        self.rs = options.get("rs")
+        self.ecut = options.get("ecut")
+        self.ktwist = numpy.array(options.get("ktwist", [0, 0, 0])).reshape(3)
+
+        self.thermal = options.get("thermal", False)
+        self._alt_convention = options.get("alt_convention", False)
+        self.write_ints = options.get("write_integrals", False)
+
+        self.sparse = True
+        self.control_variate = False
+        self.diagH1 = True
+
+        # Total # of electrons.
+        self.ne = self.nup + self.ndown
+        # Spin polarisation.
+        self.zeta = (self.nup - self.ndown) / self.ne
+        # Density.
+        self.rho = ((4.0 * numpy.pi) / 3.0 * self.rs**3.0) ** (-1.0)
+        # Box Length.
+        self.L = self.rs * (4.0 * self.ne * numpy.pi / 3.0) ** (1 / 3.0)
+        # Volume
+        self.vol = self.L**3.0
+        # k-space grid spacing.
+        self.kfac = 2 * numpy.pi / self.L
+        # Fermi Wavevector (infinite system).
+        self.kf = (3 * (self.zeta + 1) * numpy.pi**2 * self.ne / self.L**3) ** (1 / 3.0)
+        # Fermi energy (inifinite systems).
+        self.ef = 0.5 * self.kf**2
+        # Core energy.
+        self.ecore = 0.5 * self.ne * self.madelung()
+
+        if verbose:
+            if self.thermal:
+                print("# Thermal UEG activated.")
+
+            print(f"# Number of spin-up electrons: {self.nup:d}")
+            print(f"# Number of spin-down electrons: {self.ndown:d}")
+            print(f"# rs: {self.rs:6.4e}")
+            print(f"# Spin polarisation (zeta): {self.zeta:6.4e}")
+            print(f"# Electron density (rho): {self.rho:13.8e}")
+            print(f"# Box Length (L): {self.L:13.8e}")
+            print(f"# Volume: {self.vol:13.8e}")
+            print(f"# k-space factor (2pi/L): {self.kfac:13.8e}")
+
+    def build(self, verbose=False):
+        # Get plane wave basis vectors and corresponding eigenvalues.
+        self.sp_eigv, self.basis, self.nmax = self.sp_energies(self.ktwist, self.kfac, self.ecut)
+        self.shifted_nmax = 2 * self.nmax
+        self.imax_sq = numpy.dot(self.basis[-1], self.basis[-1])
+        self.create_lookup_table()
+
+        for i, k in enumerate(self.basis):
+            assert i == self.lookup_basis(k)
+
+        # Number of plane waves.
+        self.nbasis = len(self.sp_eigv)
+        self.nactive = self.nbasis
+        self.ncore = 0
+        self.nfv = 0
+        self.mo_coeff = None
+
+        # ---------------------------------------------------------------------
+        T = numpy.diag(self.sp_eigv)
+        h1e_mod = self.mod_one_body(T)
+        self.H1 = numpy.array([T, T])  # Making alpha and beta.
+        self.h1e_mod = numpy.array([h1e_mod, h1e_mod])
+
+        # ---------------------------------------------------------------------
+        # Allowed momentum transfers (4*ecut).
+        _, qvecs, self.qnmax = self.sp_energies(self.ktwist, self.kfac, 4 * self.ecut)
+
+        # Omit Q = 0 term.
+        self.qvecs = numpy.copy(qvecs[1:])
+        self.vqvec = numpy.array([self.vq(self.kfac * q) for q in self.qvecs])
+
+        # Number of momentum transfer vectors / auxiliary fields.
+        # Can reduce by symmetry but be stupid for the moment.
+        self.nchol = len(self.qvecs)
+        self.nfields = 2 * len(self.qvecs)
+        self.get_momentum_transfers()
+
+        if verbose:
+            print(f"# Number of plane waves: {self.nbasis:d}")
+            print(f"# Number of Cholesky vectors: {self.nchol:d}.")
+            print(f"# Number of auxiliary fields: {self.nfields:d}.")
+            print("# Constructing two-body potentials incore.")
+
+        # ---------------------------------------------------------------------
+        self.chol_vecs, self.iA, self.iB = self.two_body_potentials_incore()
+
+        if self.write_ints:
+            self.write_integrals()
+
+        if verbose:
+            print(
+                "# Approximate memory required for "
+                "two-body potentials: {:13.8e} GB.".format((3 * self.iA.nnz * 16 / (1024**3)))
+            )
+            print("# Finished constructing two-body potentials.")
+            print("# Finished building UEG object.")
+
+    def sp_energies(self, ks, kfac, ecut):
+        """Calculate the allowed kvectors and resulting single particle eigenvalues (basically kinetic energy)
+        which can fit in the sphere in kspace determined by ecut.
+
+        Parameters
+        ----------
+        kfac : float
+            kspace grid spacing.
+
+        ecut : float
+            energy cutoff.
+
+        Returns
+        -------
+        spval : :class:`numpy.ndarray`
+            Array containing sorted single particle eigenvalues.
+
+        kval : :class:`numpy.ndarray`
+            Array containing basis vectors, sorted according to their
+            corresponding single-particle energy.
+        """
+
+        # Scaled Units to match with HANDE.
+        # So ecut is measured in units of 1/kfac^2.
+        nmax = int(numpy.ceil(numpy.sqrt((2 * ecut))))
+
+        spval = []
+        kval = []
+
+        for ni in range(-nmax, nmax + 1):
+            for nj in range(-nmax, nmax + 1):
+                for nk in range(-nmax, nmax + 1):
+                    spe = 0.5 * (ni**2 + nj**2 + nk**2)
+
+                    if spe <= ecut:
+                        kijk = [ni, nj, nk]
+
+                        # Reintroduce 2 \pi / L factor.
+                        ek = 0.5 * numpy.dot(numpy.array(kijk) + ks, numpy.array(kijk) + ks)
+                        kval.append(kijk)
+                        spval.append(kfac**2 * ek)
+
+        # Sort the arrays in terms of increasing energy.
+        spval = numpy.array(spval)
+        ix = numpy.argsort(spval, kind="mergesort")
+        spval = spval[ix]
+        kval = numpy.array(kval)[ix]
+        return spval, kval, nmax
+
+    def create_lookup_table(self):
+        basis_ix = []
+        for k in self.basis:
+            basis_ix.append(self.map_basis_to_index(k))
+
+        self.lookup = numpy.zeros(max(basis_ix) + 1, dtype=int)
+
+        for i, b in enumerate(basis_ix):
+            self.lookup[b] = i
+
+        self.max_ix = max(basis_ix)
+
+    def lookup_basis(self, vec):
+        if numpy.dot(vec, vec) <= self.imax_sq:
+            ix = self.map_basis_to_index(vec)
+
+            if ix >= len(self.lookup):
+                ib = None
+
+            else:
+                ib = self.lookup[ix]
+
+            return ib
+
+        else:
+            ib = None
+
+    def map_basis_to_index(self, k):
+        return (
+            (k[0] + self.nmax)
+            + self.shifted_nmax * (k[1] + self.nmax)
+            + self.shifted_nmax * self.shifted_nmax * (k[2] + self.nmax)
+        )
+
+    def get_momentum_transfers(self):
+        """Get arrays of plane wave basis vectors connected by momentum transfers Q."""
+        nlimit = self.nup
+        if self.thermal:
+            nlimit = self.nbasis
+
+        self.ikpq_i = []
+        self.ikpq_kpq = []
+
+        for iq, q in enumerate(self.qvecs):
+            idxkpq_list_i = []
+            idxkpq_list_kpq = []
+
+            for i, k in enumerate(self.basis[0:nlimit]):
+                kpq = k + q
+                idxkpq = self.lookup_basis(kpq)
+
+                if idxkpq is not None:
+                    idxkpq_list_i += [i]
+                    idxkpq_list_kpq += [idxkpq]
+
+            self.ikpq_i += [idxkpq_list_i]
+            self.ikpq_kpq += [idxkpq_list_kpq]
+
+        # ---------------------------------------------------------------------
+        self.ipmq_i = []
+        self.ipmq_pmq = []
+
+        for iq, q in enumerate(self.qvecs):
+            idxpmq_list_i = []
+            idxpmq_list_pmq = []
+
+            for i, p in enumerate(self.basis[0:nlimit]):
+                pmq = p - q
+                idxpmq = self.lookup_basis(pmq)
+
+                if idxpmq is not None:
+                    idxpmq_list_i += [i]
+                    idxpmq_list_pmq += [idxpmq]
+
+            self.ipmq_i += [idxpmq_list_i]
+            self.ipmq_pmq += [idxpmq_list_pmq]
+
+        for iq, q in enumerate(self.qvecs):
+            self.ikpq_i[iq] = numpy.array(self.ikpq_i[iq], dtype=numpy.int64)
+            self.ikpq_kpq[iq] = numpy.array(self.ikpq_kpq[iq], dtype=numpy.int64)
+            self.ipmq_i[iq] = numpy.array(self.ipmq_i[iq], dtype=numpy.int64)
+            self.ipmq_pmq[iq] = numpy.array(self.ipmq_pmq[iq], dtype=numpy.int64)
+
+    def madelung(self):
+        """Use expression in Schoof et al. (PhysRevLett.115.130402) for the
+        Madelung contribution to the total energy fitted to L.M. Fraser et al.
+        Phys. Rev. B 53, 1814.
+
+        Parameters
+        ----------
+        rs : float
+            Wigner-Seitz radius.
+
+        ne : int
+            Number of electrons.
+
+        Returns
+        -------
+        v_M: float
+            Madelung potential (in Hartrees).
+        """
+        c1 = -2.837297
+        c2 = (3.0 / (4.0 * numpy.pi)) ** (1.0 / 3.0)
+        return c1 * c2 / (self.ne ** (1.0 / 3.0) * self.rs)
+
+    def mod_one_body(self, T):
+        """Absorb the diagonal term of the two-body Hamiltonian to the one-body term.
+        Essentially adding the third term in Eq.(11b) of Phys. Rev. B 75, 245123.
+
+        Parameters
+        ----------
+        T : float
+            one-body Hamiltonian (i.e. kinetic energy)
+
+        Returns
+        -------
+        h1e_mod: float
+            modified one-body Hamiltonian
+        """
+        h1e_mod = numpy.copy(T)
+
+        fac = 1.0 / (2.0 * self.vol)
+        for i, ki in enumerate(self.basis):
+            for j, kj in enumerate(self.basis):
+                if i != j:
+                    q = self.kfac * (ki - kj)
+                    h1e_mod[i, i] = h1e_mod[i, i] - fac * self.vq(q)
+
+        return h1e_mod
+
+    def vq(self, q):
+        """The typical 3D Coulomb kernel
+
+        Parameters
+        ----------
+        q : float
+            a plane-wave vector
+
+        Returns
+        -------
+        v_M: float
+            3D Coulomb kernel (in Hartrees)
+        """
+        return 4 * numpy.pi / numpy.dot(q, q)
+
+    def scaled_density_operator_incore(self, transpose):
+        """Density operator as defined in Eq.(6) of PRB(75)245123
+
+        Parameters
+        ----------
+        q : float
+            a plane-wave vector
+
+        Returns
+        -------
+        rho_q: float
+            density operator
+        """
+        rho_ikpq_i = []
+        rho_ikpq_kpq = []
+
+        for iq, q in enumerate(self.qvecs):
+            idxkpq_list_i = []
+            idxkpq_list_kpq = []
+
+            for i, k in enumerate(self.basis):
+                kpq = k + q
+                idxkpq = self.lookup_basis(kpq)
+
+                if idxkpq is not None:
+                    idxkpq_list_i += [i]
+                    idxkpq_list_kpq += [idxkpq]
+
+            rho_ikpq_i += [idxkpq_list_i]
+            rho_ikpq_kpq += [idxkpq_list_kpq]
+
+        for iq, q in enumerate(self.qvecs):
+            rho_ikpq_i[iq] = numpy.array(rho_ikpq_i[iq], dtype=numpy.int64)
+            rho_ikpq_kpq[iq] = numpy.array(rho_ikpq_kpq[iq], dtype=numpy.int64)
+
+        nq = len(self.qvecs)
+        nnz = 0
+        for iq in range(nq):
+            nnz += rho_ikpq_kpq[iq].shape[0]
+
+        col_index = []
+        row_index = []
+        values = []
+
+        if transpose:
+            for iq in range(nq):
+                qscaled = self.kfac * self.qvecs[iq]
+                # Due to the HS transformation, we have to do pi / 2*vol as opposed to 2*pi / vol
+                piovol = numpy.pi / (self.vol)
+                factor = (piovol / numpy.dot(qscaled, qscaled)) ** 0.5
+
+                for innz, kpq in enumerate(rho_ikpq_kpq[iq]):
+                    row_index += [rho_ikpq_kpq[iq][innz] + rho_ikpq_i[iq][innz] * self.nbasis]
+                    col_index += [iq]
+                    values += [factor]
+
+        else:
+            for iq in range(nq):
+                qscaled = self.kfac * self.qvecs[iq]
+                # Due to the HS transformation, we have to do pi / 2*vol as opposed to 2*pi / vol
+                piovol = numpy.pi / (self.vol)
+                factor = (piovol / numpy.dot(qscaled, qscaled)) ** 0.5
+
+                for innz, kpq in enumerate(rho_ikpq_kpq[iq]):
+                    row_index += [rho_ikpq_kpq[iq][innz] * self.nbasis + rho_ikpq_i[iq][innz]]
+                    col_index += [iq]
+                    values += [factor]
+
+        rho_q = scipy.sparse.csc_matrix(
+            (values, (row_index, col_index)),
+            shape=(self.nbasis * self.nbasis, nq),
+            dtype=numpy.complex128,
+        )
+        return rho_q
+
+    def two_body_potentials_incore(self):
+        """Calculate A and B of Eq.(13) of PRB(75)245123 for a given plane-wave vector q
+
+        Returns
+        -------
+        iA : numpy array
+            Eq.(13a)
+
+        iB : numpy array
+            Eq.(13b)
+        """
+        rho_q = self.scaled_density_operator_incore(False)
+        rho_qH = self.scaled_density_operator_incore(True)
+        iA = 1j * (rho_q + rho_qH)
+        iB = -(rho_q - rho_qH)
+        return (rho_q, iA, iB)
+
+    def hijkl(self, i, j, k, l):
+        """Compute <ij|kl> = (ik|jl) = 1/Omega * 4pi/(kk-ki)**2
+
+        Checks for momentum conservation k_i + k_j = k_k + k_k, or
+        k_k - k_i = k_j - k_l.
+
+        Parameters
+        ----------
+        i, j, k, l : int
+            Orbital indices for integral (ik|jl) = <ij|kl>.
+
+        Returns
+        -------
+        integral : float
+            (ik|jl)
+        """
+        q1 = self.basis[k] - self.basis[i]
+        q2 = self.basis[j] - self.basis[l]
+
+        if numpy.dot(q1, q1) > 1e-12 and numpy.dot(q1 - q2, q1 - q2) < 1e-12:
+            return 1.0 / self.vol * self.vq(self.kfac * q1)
+
+        else:
+            return 0.0
+
+    def compute_real_transformation(self):
+        U22 = numpy.zeros((2, 2), dtype=numpy.complex128)
+        U22[0, 0] = 1.0 / numpy.sqrt(2.0)
+        U22[0, 1] = 1.0 / numpy.sqrt(2.0)
+        U22[1, 0] = -1.0j / numpy.sqrt(2.0)
+        U22[1, 1] = 1.0j / numpy.sqrt(2.0)
+
+        U = numpy.zeros((self.nbasis, self.nbasis), dtype=numpy.complex128)
+
+        for i, b in enumerate(self.basis):
+            if numpy.sum(b * b) == 0:
+                U[i, i] = 1.0
+
+            else:
+                mb = -b
+                diff = numpy.einsum("ij->i", (self.basis - mb) ** 2)
+                idx = numpy.argwhere(diff == 0)
+                assert idx.ravel().shape[0] == 1
+
+                if i < idx:
+                    idx = idx.ravel()[0]
+                    U[i, i] = U22[0, 0]
+                    U[i, idx] = U22[0, 1]
+                    U[idx, i] = U22[1, 0]
+                    U[idx, idx] = U22[1, 1]
+
+                else:
+                    continue
+
+        U = U.T.copy()
+        return U
+
+    def eri_4(self):
+        eri_chol = 4 * self.chol_vecs.dot(self.chol_vecs.T)
+        eri_chol = (
+            eri_chol.toarray().reshape((self.nbasis, self.nbasis, self.nbasis, self.nbasis)).real
+        )
+        eri_chol = eri_chol.transpose(0, 1, 3, 2)
+        return eri_chol
+
+    def eri_8(self):
+        """Compute 8-fold symmetric integrals. Useful for running standard
+        quantum chemistry methods,"""
+        eri = self.eri_4()
+        U = self.compute_real_transformation()
+        eri0 = numpy.einsum("mp,mnls->pnls", U.conj(), eri, optimize=True)
+        eri1 = numpy.einsum("nq,pnls->pqls", U, eri0, optimize=True)
+        eri2 = numpy.einsum("lr,pqls->pqrs", U.conj(), eri1, optimize=True)
+        eri3 = numpy.einsum("st,pqrs->pqrt", U, eri2, optimize=True).real
+        return eri3
+
+    def write_integrals(self, filename="ueg_integrals.h5"):
+        write_qmcpack_sparse(
+            self.H1[0],
+            2 * self.chol_vecs.toarray(),
+            self.nelec,
+            self.nbasis,
+            enuc=0.0,
+            filename=filename,
+        )
diff --git a/ipie/addons/thermal/walkers/__init__.py b/ipie/addons/thermal/walkers/__init__.py
new file mode 100644
index 00000000..f91ef518
--- /dev/null
+++ b/ipie/addons/thermal/walkers/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
diff --git a/ipie/addons/thermal/walkers/pop_controller.py b/ipie/addons/thermal/walkers/pop_controller.py
new file mode 100644
index 00000000..8725044f
--- /dev/null
+++ b/ipie/addons/thermal/walkers/pop_controller.py
@@ -0,0 +1,459 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+
+from ipie.config import MPI
+from ipie.utils.backend import arraylib as xp
+from ipie.walkers.pop_controller import PopController, PopControllerTimer
+
+
+class ThermalPopController(PopController):
+    def __init__(
+        self,
+        num_walkers_local,
+        num_steps,
+        mpi_handler=None,
+        pop_control_method="pair_branch",
+        min_weight=0.1,
+        max_weight=4,
+        reconfiguration_freq=50,
+        verbose=False,
+    ):
+        super().__init__(
+            num_walkers_local,
+            num_steps,
+            mpi_handler,
+            pop_control_method,
+            min_weight,
+            max_weight,
+            reconfiguration_freq,
+            verbose,
+        )
+
+    def pop_control(self, walkers, comm):
+        self.timer.start_time()
+        if self.ntot_walkers == 1:
+            return
+        weights = numpy.abs(xp.array(walkers.weight))
+        global_weights = numpy.empty(len(weights) * comm.size)
+        self.timer.add_non_communication()
+        self.timer.start_time()
+        if self.method == "comb":
+            comm.Allgather(weights, global_weights)
+            total_weight = sum(global_weights)
+        else:
+            sum_weights = numpy.sum(weights)
+            total_weight = numpy.empty(1, dtype=numpy.float64)
+            if hasattr(sum_weights, "get"):
+                sum_weights = sum_weights.get()
+            comm.Reduce(sum_weights, total_weight, op=MPI.SUM, root=0)
+            comm.Bcast(total_weight, root=0)
+            total_weight = total_weight[0]
+
+        self.timer.add_communication()
+        self.timer.start_time()
+
+        # Rescale weights to combat exponential decay/growth.
+        scale = total_weight / self.target_weight
+        if total_weight < 1e-8:
+            if comm.rank == 0:
+                print(f"# Warning: Total weight is {total_weight:13.8e}")
+                print("# Something is seriously wrong.")
+            raise ValueError
+        self.total_weight = total_weight
+        # Todo: Just standardise information we want to send between routines.
+        walkers.unscaled_weight = walkers.weight
+        walkers.weight = walkers.weight / scale
+        if self.method == "comb":
+            global_weights = global_weights / scale
+            self.timer.add_non_communication()
+            comb(walkers, comm, global_weights, self.target_weight, self.timer)
+        elif self.method == "pair_branch":
+            pair_branch(walkers, comm, self.max_weight, self.min_weight, self.timer)
+        elif self.method == "stochastic_reconfiguration":
+            self.reconfiguration_counter += 1
+            if self.reconfiguration_counter % self.reconfiguration_freq == 0:
+                stochastic_reconfiguration(walkers, comm, self.timer)
+                self.reconfiguration_counter = 0
+        else:
+            if comm.rank == 0:
+                print("Unknown population control method.")
+
+
+def get_buffer(walkers, iw):
+    """Get iw-th walker buffer for MPI communication
+    iw : int
+        the walker index of interest
+    Returns
+    -------
+    buff : dict
+        Relevant walker information for population control.
+    """
+    s = 0
+    buff = xp.zeros(walkers.buff_size, dtype=numpy.complex128)
+    for d in walkers.buff_names:
+        data = walkers.__dict__[d]
+        if (data is None) or isinstance(
+            data, (int, float, complex, numpy.float64, numpy.complex128)
+        ):
+            continue
+        assert data.size % walkers.nwalkers == 0  # Only walker-specific data is being communicated
+        if isinstance(data[iw], (xp.ndarray)):
+            buff[s : s + data[iw].size] = xp.array(data[iw].ravel())
+            s += data[iw].size
+        elif isinstance(data[iw], list):  # when data is list
+            for l in data[iw]:
+                if isinstance(l, (xp.ndarray)):
+                    buff[s : s + l.size] = xp.array(l.ravel())
+                    s += l.size
+                elif isinstance(l, (int, float, complex, numpy.float64, numpy.complex128)):
+                    buff[s : s + 1] = l
+                    s += 1
+        else:
+            buff[s : s + 1] = xp.array(data[iw])
+            s += 1
+
+    stack_buff = walkers.stack[iw].get_buffer()
+    buff = numpy.concatenate((buff, stack_buff))
+    return buff
+
+
+def set_buffer(walkers, iw, buff):
+    """Set walker buffer following MPI communication
+    Parameters
+    -------
+    buff : dict
+        Relevant walker information for population control.
+    """
+    s = 0
+    for d in walkers.buff_names:
+        data = walkers.__dict__[d]
+        if (data is None) or isinstance(
+            data, (int, float, complex, numpy.float64, numpy.complex128)
+        ):
+            continue
+        assert data.size % walkers.nwalkers == 0  # Only walker-specific data is being communicated
+        if isinstance(data[iw], xp.ndarray):
+            walkers.__dict__[d][iw] = xp.array(
+                buff[s : s + data[iw].size].reshape(data[iw].shape).copy()
+            )
+            s += data[iw].size
+        elif isinstance(data[iw], list):
+            for ix, l in enumerate(data[iw]):
+                if isinstance(l, (xp.ndarray)):
+                    walkers.__dict__[d][iw][ix] = xp.array(
+                        buff[s : s + l.size].reshape(l.shape).copy()
+                    )
+                    s += l.size
+                elif isinstance(l, (int, float, complex)):
+                    walkers.__dict__[d][iw][ix] = buff[s]
+                    s += 1
+        else:
+            if isinstance(walkers.__dict__[d][iw], (int, numpy.int64)):
+                walkers.__dict__[d][iw] = int(buff[s].real)
+            elif isinstance(walkers.__dict__[d][iw], (float, numpy.float64)):
+                walkers.__dict__[d][iw] = buff[s].real
+            else:
+                walkers.__dict__[d][iw] = buff[s]
+            s += 1
+
+    walkers.stack[iw].set_buffer(buff[walkers.buff_size :])
+
+
+def comb(walkers, comm, weights, target_weight, timer=PopControllerTimer()):
+    """Apply the comb method of population control / branching.
+
+    See Booth & Gubernatis PRE 80, 046704 (2009).
+
+    Parameters
+    ----------
+    comm : MPI communicator
+    """
+    # Need make a copy to since the elements in psi are only references to
+    # walker objects in memory. We don't want future changes in a given
+    # element of psi having unintended consequences.
+    # todo : add phase to walker for free projection
+    timer.start_time()
+    if comm.rank == 0:
+        parent_ix = numpy.zeros(len(weights), dtype="i")
+    else:
+        parent_ix = numpy.empty(len(weights), dtype="i")
+    if comm.rank == 0:
+        total_weight = sum(weights)
+        cprobs = numpy.cumsum(weights)
+        r = numpy.random.random()
+        comb = [(i + r) * (total_weight / target_weight) for i in range(target_weight)]
+        iw = 0
+        ic = 0
+        while ic < len(comb):
+            if comb[ic] < cprobs[iw]:
+                parent_ix[iw] += 1
+                ic += 1
+            else:
+                iw += 1
+        data = {"ix": parent_ix}
+    else:
+        data = None
+
+    timer.add_non_communication()
+
+    timer.start_time()
+    data = comm.bcast(data, root=0)
+    timer.add_communication()
+    timer.start_time()
+    parent_ix = data["ix"]
+    # Keep total weight saved for capping purposes.
+    # where returns a tuple (array,), selecting first element.
+    kill = numpy.where(parent_ix == 0)[0]
+    clone = numpy.where(parent_ix > 1)[0]
+    reqs = []
+    # First initiate non-blocking sends of walkers.
+    timer.add_non_communication()
+    timer.start_time()
+    comm.barrier()
+    timer.add_communication()
+    for i, (c, k) in enumerate(zip(clone, kill)):
+        # Sending from current processor?
+        if c // walkers.nwalkers == comm.rank:
+            timer.start_time()
+            # Location of walker to clone in local list.
+            clone_pos = c % walkers.nwalkers
+            # copying walker data to intermediate buffer to avoid issues
+            # with accessing walker data during send. Might not be
+            # necessary.
+            dest_proc = k // walkers.nwalkers
+            buff = get_buffer(walkers, clone_pos)
+            timer.add_non_communication()
+            timer.start_time()
+            reqs.append(comm.Isend(buff, dest=dest_proc, tag=i))
+            timer.add_send_time()
+    # Now receive walkers on processors where walkers are to be killed.
+    for i, (c, k) in enumerate(zip(clone, kill)):
+        # Receiving to current processor?
+        if k // walkers.nwalkers == comm.rank:
+            timer.start_time()
+            # Processor we are receiving from.
+            source_proc = c // walkers.nwalkers
+            # Location of walker to kill in local list of walkers.
+            kill_pos = k % walkers.nwalkers
+            buffer = walkers.walker_buffer
+            buffer = numpy.concatenate((walkers.walker_buffer, walkers.stack[0].stack_buffer))
+            timer.add_non_communication()
+            timer.start_time()
+            comm.Recv(buffer, source=source_proc, tag=i)
+            # with h5py.File('walkers_recv.h5', 'w') as fh5:
+            # fh5['walk_{}'.format(k)] = walkers.walker_buffer.copy()
+            timer.add_recv_time()
+            timer.start_time()
+            set_buffer(walkers, kill_pos, buffer)
+            timer.add_non_communication()
+            # with h5py.File('after_{}.h5'.format(comm.rank), 'a') as fh5:
+            # fh5['walker_{}_{}_{}'.format(c,k,comm.rank)] = walkers.walkers[kill_pos].get_buffer()
+    timer.start_time()
+    # Complete non-blocking send.
+    for rs in reqs:
+        rs.wait()
+    # Necessary?
+    # if len(kill) > 0 or len(clone) > 0:
+    # sys.exit()
+    comm.Barrier()
+    timer.add_communication()
+    # Reset walker weight.
+    # TODO: check this.
+    # for w in walkers.walkers:
+    # w.weight = 1.0
+    timer.start_time()
+    walkers.weight.fill(1.0)
+    timer.add_non_communication()
+
+
+def pair_branch(walkers, comm, max_weight, min_weight, timer=PopControllerTimer()):
+    timer.start_time()
+    walker_info_0 = xp.array(xp.abs(walkers.weight))
+    timer.add_non_communication()
+
+    timer.start_time()
+    glob_inf = None
+    glob_inf_0 = None
+    glob_inf_1 = None
+    glob_inf_2 = None
+    glob_inf_3 = None
+    if comm.rank == 0:
+        glob_inf_0 = numpy.empty([comm.size, walkers.nwalkers], dtype=numpy.float64)
+        glob_inf_1 = numpy.empty([comm.size, walkers.nwalkers], dtype=numpy.int64)
+        glob_inf_1.fill(1)
+        glob_inf_2 = numpy.array(
+            [[r for i in range(walkers.nwalkers)] for r in range(comm.size)], dtype=numpy.int64
+        )
+        glob_inf_3 = numpy.array(
+            [[r for i in range(walkers.nwalkers)] for r in range(comm.size)], dtype=numpy.int64
+        )
+
+    timer.add_non_communication()
+
+    timer.start_time()
+    if hasattr(walker_info_0, "get"):
+        walker_info_0 = walker_info_0.get()
+    comm.Gather(
+        walker_info_0, glob_inf_0, root=0
+    )  # gather |w_i| from all processors (comm.size x nwalkers)
+    timer.add_communication()
+
+    # Want same random number seed used on all processors
+    timer.start_time()
+    if comm.rank == 0:
+        # Rescale weights.
+        glob_inf = numpy.zeros((walkers.nwalkers * comm.size, 4), dtype=numpy.float64)
+        glob_inf[:, 0] = glob_inf_0.ravel()  # contains walker |w_i|
+        glob_inf[:, 1] = (
+            glob_inf_1.ravel()
+        )  # all initialized to 1 when it becomes 2 then it will be "branched"
+        glob_inf[:, 2] = (
+            glob_inf_2.ravel()
+        )  # contain processor+walker indices (initial) (i.e., where walkers live)
+        glob_inf[:, 3] = (
+            glob_inf_3.ravel()
+        )  # contain processor+walker indices (final) (i.e., where walkers live)
+        sort = numpy.argsort(glob_inf[:, 0], kind="mergesort")
+        isort = numpy.argsort(sort, kind="mergesort")
+        glob_inf = glob_inf[sort]
+        s = 0
+        e = len(glob_inf) - 1
+        tags = []
+        # go through walkers pair-wise
+        while s < e:
+            if glob_inf[s][0] < min_weight or glob_inf[e][0] > max_weight:
+                # sum of paired walker weights
+                wab = glob_inf[s][0] + glob_inf[e][0]
+                r = numpy.random.rand()
+                if r < glob_inf[e][0] / wab:
+                    # clone large weight walker
+                    glob_inf[e][0] = 0.5 * wab
+                    glob_inf[e][1] = 2
+                    # Processor we will send duplicated walker to
+                    glob_inf[e][3] = glob_inf[s][2]
+                    send = glob_inf[s][2]
+                    # Kill small weight walker
+                    glob_inf[s][0] = 0.0
+                    glob_inf[s][1] = 0
+                    glob_inf[s][3] = glob_inf[e][2]
+                else:
+                    # clone small weight walker
+                    glob_inf[s][0] = 0.5 * wab
+                    glob_inf[s][1] = 2
+                    # Processor we will send duplicated walker to
+                    glob_inf[s][3] = glob_inf[e][2]
+                    send = glob_inf[e][2]
+                    # Kill small weight walker
+                    glob_inf[e][0] = 0.0
+                    glob_inf[e][1] = 0
+                    glob_inf[e][3] = glob_inf[s][2]
+                tags.append([send])
+                s += 1
+                e -= 1
+            else:
+                break
+        nw = walkers.nwalkers
+        glob_inf = glob_inf[isort].reshape((comm.size, nw, 4))
+    else:
+        data = None
+        glob_inf = None
+    timer.add_non_communication()
+    timer.start_time()
+
+    data = numpy.empty([walkers.nwalkers, 4], dtype=numpy.float64)
+    # 0 = weight, 1 = status (live, branched, die), 2 = initial index, 3 = final index
+    comm.Scatter(glob_inf, data, root=0)
+
+    timer.add_communication()
+    # Keep total weight saved for capping purposes.
+    reqs = []
+    for iw, walker in enumerate(data):
+        if walker[1] > 1:
+            timer.start_time()
+            tag = comm.rank * walkers.nwalkers + walker[3]
+            walkers.weight[iw] = walker[0]
+            buff = get_buffer(walkers, iw)
+            timer.add_non_communication()
+            timer.start_time()
+            reqs.append(comm.Isend(buff, dest=int(round(walker[3])), tag=tag))
+            timer.add_send_time()
+    for iw, walker in enumerate(data):
+        if walker[1] == 0:
+            timer.start_time()
+            tag = walker[3] * walkers.nwalkers + comm.rank
+            buffer = walkers.walker_buffer
+            buffer = numpy.concatenate((walkers.walker_buffer, walkers.stack[0].stack_buffer))
+            timer.add_non_communication()
+            timer.start_time()
+            comm.Recv(buffer, source=int(round(walker[3])), tag=tag)
+            timer.add_recv_time()
+            timer.start_time()
+            set_buffer(walkers, iw, buffer)
+            timer.add_non_communication()
+    timer.start_time()
+    for r in reqs:
+        r.wait()
+    timer.add_communication()
+
+
+def stochastic_reconfiguration(walkers, comm, timer=PopControllerTimer()):
+    # gather all walker information on the root
+    timer.start_time()
+    nwalkers = walkers.nwalkers
+    local_buffer = xp.array([get_buffer(walkers, i) for i in range(nwalkers)])
+    walker_len = local_buffer[0].shape[0]
+    global_buffer = None
+    if comm.rank == 0:
+        global_buffer = numpy.zeros((comm.size, nwalkers, walker_len), dtype=numpy.complex128)
+    timer.add_non_communication()
+
+    timer.start_time()
+    comm.Gather(local_buffer, global_buffer, root=0)
+    timer.add_communication()
+
+    # perform sr on the root
+    new_global_buffer = None
+    timer.start_time()
+    if comm.rank == 0:
+        new_global_buffer = numpy.zeros((comm.size, nwalkers, walker_len), dtype=numpy.complex128)
+        cumulative_weights = numpy.cumsum(abs(global_buffer[:, :, 0]))
+        total_weight = cumulative_weights[-1]
+        new_average_weight = total_weight / nwalkers / comm.size
+        zeta = numpy.random.rand()
+        for i in range(comm.size * nwalkers):
+            z = (i + zeta) / nwalkers / comm.size
+            new_i = numpy.searchsorted(cumulative_weights, z * total_weight)
+            new_global_buffer[i // nwalkers, i % nwalkers] = global_buffer[
+                new_i // nwalkers, new_i % nwalkers
+            ]
+            new_global_buffer[i // nwalkers, i % nwalkers, 0] = new_average_weight
+
+    timer.add_non_communication()
+
+    # distribute information of newly selected walkers
+    timer.start_time()
+    comm.Scatter(new_global_buffer, local_buffer, root=0)
+    timer.add_communication()
+
+    # set walkers using distributed information
+    timer.start_time()
+    for i in range(nwalkers):
+        set_buffer(walkers, i, local_buffer[i])
+    timer.add_non_communication()
diff --git a/ipie/addons/thermal/walkers/stack.py b/ipie/addons/thermal/walkers/stack.py
new file mode 100644
index 00000000..4476cd34
--- /dev/null
+++ b/ipie/addons/thermal/walkers/stack.py
@@ -0,0 +1,415 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+# TODO: Incorporate the `stack` buffer in the `walkers` buffer for MPI. See:
+#       https://github.com/JoonhoLee-Group/ipie/issues/301
+
+import numpy
+import scipy.linalg
+
+from ipie.utils.misc import get_numeric_names
+
+
+class PropagatorStack:
+    def __init__(
+        self,
+        stack_size,
+        nslice,
+        nbasis,
+        dtype,
+        BT=None,
+        BTinv=None,
+        diagonal=False,
+        averaging=False,
+        lowrank=True,
+        thresh=1e-6,
+    ):
+        self.time_slice = 0
+        self.stack_size = stack_size
+        self.nslice = nslice
+        self.nstack = self.nslice // self.stack_size
+        self.nbasis = nbasis
+        self.diagonal_trial = diagonal
+        self.averaging = averaging
+        self.thresh = thresh
+        self.lowrank = lowrank
+        self.ovlp = numpy.asarray([1.0, 1.0])
+        self.reortho = 1
+
+        if self.lowrank:
+            assert diagonal
+
+        if self.nstack * self.stack_size < self.nslice:
+            print("stack_size must divide the total path length")
+            assert self.nstack * self.stack_size == self.nslice
+
+        self.dtype = dtype
+        self.BT = BT
+        self.BTinv = BTinv
+        self.counter = 0
+        self.block = 0
+
+        self.stack = numpy.zeros((self.nstack, 2, nbasis, nbasis), dtype=dtype)
+        self.left = numpy.zeros((self.nstack, 2, nbasis, nbasis), dtype=dtype)
+        self.right = numpy.zeros((self.nstack, 2, nbasis, nbasis), dtype=dtype)
+
+        self.G = numpy.asarray(
+            [numpy.eye(self.nbasis, dtype=dtype), numpy.eye(self.nbasis, dtype=dtype)]  # Ga
+        )  # Gb
+
+        if self.lowrank:
+            self.update_new = self.update_low_rank
+        else:
+            self.update_new = self.update_full_rank
+
+        # Global block matrix
+        if self.lowrank:
+            self.Ql = numpy.zeros((2, nbasis, nbasis), dtype=dtype)
+            self.Dl = numpy.zeros((2, nbasis), dtype=dtype)
+            self.Tl = numpy.zeros((2, nbasis, nbasis), dtype=dtype)
+
+            self.Qr = numpy.zeros((2, nbasis, nbasis), dtype=dtype)
+            self.Dr = numpy.zeros((2, nbasis), dtype=dtype)
+            self.Tr = numpy.zeros((2, nbasis, nbasis), dtype=dtype)
+
+            self.CT = numpy.zeros((2, nbasis, nbasis), dtype=dtype)
+            self.theta = numpy.zeros((2, nbasis, nbasis), dtype=dtype)
+            self.mT = nbasis
+
+        self.buff_names, self.buff_size = get_numeric_names(self.__dict__)
+        self.stack_buffer = numpy.zeros(self.buff_size, dtype=numpy.complex128)
+
+        # Set all entries to be the identity matrix
+        self.reset()
+
+    def get(self, ix):
+        return self.stack[ix]
+
+    def get_buffer(self):
+        s = 0
+        buff = numpy.zeros(self.buff_size, dtype=numpy.complex128)
+        for d in self.buff_names:
+            data = self.__dict__[d]
+            if isinstance(data, (numpy.ndarray)):
+                buff[s : s + data.size] = data.ravel()
+                s += data.size
+            else:
+                buff[s : s + 1] = data
+                s += 1
+        return buff
+
+    def set_buffer(self, buff):
+        s = 0
+        for d in self.buff_names:
+            data = self.__dict__[d]
+            if isinstance(data, numpy.ndarray):
+                self.__dict__[d] = buff[s : s + data.size].reshape(data.shape).copy()
+                dsize = data.size
+            else:
+                if isinstance(self.__dict__[d], int):
+                    self.__dict__[d] = int(buff[s].real)
+                elif isinstance(self.__dict__[d], float):
+                    self.__dict__[d] = float(buff[s].real)
+                else:
+                    self.__dict__[d] = buff[s]
+                dsize = 1
+            s += dsize
+
+    def set_all(self, BT):
+        # Diagonal = True assumes BT is diagonal and left is also diagonal
+        if self.diagonal_trial:
+            for i in range(0, self.nslice):
+                ix = i // self.stack_size  # bin index
+                # Commenting out these two. It is only useful for Hubbard
+                self.left[ix, 0] = numpy.diag(
+                    numpy.multiply(BT[0].diagonal(), self.left[ix, 0].diagonal())
+                )
+                self.left[ix, 1] = numpy.diag(
+                    numpy.multiply(BT[1].diagonal(), self.left[ix, 1].diagonal())
+                )
+                self.stack[ix, 0] = self.left[ix, 0].copy()
+                self.stack[ix, 1] = self.left[ix, 1].copy()
+        else:
+            for i in range(0, self.nslice):
+                ix = i // self.stack_size  # bin index
+                self.left[ix, 0] = numpy.dot(BT[0], self.left[ix, 0])
+                self.left[ix, 1] = numpy.dot(BT[1], self.left[ix, 1])
+                self.stack[ix, 0] = self.left[ix, 0].copy()
+                self.stack[ix, 1] = self.left[ix, 1].copy()
+
+        if self.lowrank:
+            self.initialize_left()
+            for s in [0, 1]:
+                self.Qr[s] = numpy.identity(self.nbasis, dtype=self.dtype)
+                self.Dr[s] = numpy.ones(self.nbasis, dtype=self.dtype)
+                self.Tr[s] = numpy.identity(self.nbasis, dtype=self.dtype)
+
+    def reset(self):
+        self.time_slice = 0
+        self.block = 0
+        for i in range(0, self.nstack):
+            self.stack[i, 0] = numpy.identity(self.nbasis, dtype=self.dtype)
+            self.stack[i, 1] = numpy.identity(self.nbasis, dtype=self.dtype)
+            self.right[i, 0] = numpy.identity(self.nbasis, dtype=self.dtype)
+            self.right[i, 1] = numpy.identity(self.nbasis, dtype=self.dtype)
+            self.left[i, 0] = numpy.identity(self.nbasis, dtype=self.dtype)
+            self.left[i, 1] = numpy.identity(self.nbasis, dtype=self.dtype)
+
+        if self.lowrank:
+            for s in [0, 1]:
+                self.Qr[s] = numpy.identity(self.nbasis, dtype=self.dtype)
+                self.Dr[s] = numpy.ones(self.nbasis, dtype=self.dtype)
+                self.Tr[s] = numpy.identity(self.nbasis, dtype=self.dtype)
+
+    # Form BT product for i = 1, ..., nslices - 1 (i.e., skip i = 0)
+    # \TODO add non-diagonal version of this
+    def initialize_left(self):
+        assert self.diagonal_trial
+        for spin in [0, 1]:
+            # We will assume that B matrices are all diagonal for left....
+            # B = self.stack[1]
+            B = self.stack[0]
+            self.Dl[spin] = B[spin].diagonal()
+            self.Ql[spin] = numpy.identity(B[spin].shape[0])
+            self.Tl[spin] = numpy.identity(B[spin].shape[0])
+
+            # for ix in range(2, self.nstack):
+            for ix in range(1, self.nstack):
+                B = self.stack[ix]
+                C2 = numpy.einsum("ii,i->i", B[spin], self.Dl[spin])
+                self.Dl[spin] = C2
+
+    def update(self, B):
+        if self.counter == 0:
+            self.stack[self.block, 0] = numpy.identity(B.shape[-1], dtype=B.dtype)
+            self.stack[self.block, 1] = numpy.identity(B.shape[-1], dtype=B.dtype)
+        self.stack[self.block, 0] = B[0].dot(self.stack[self.block, 0])
+        self.stack[self.block, 1] = B[1].dot(self.stack[self.block, 1])
+        self.time_slice += 1
+        self.block = self.time_slice // self.stack_size
+        self.counter = (self.counter + 1) % self.stack_size
+
+    def update_full_rank(self, B):
+        # Diagonal = True assumes BT is diagonal and left is also diagonal
+        if self.counter == 0:
+            self.right[self.block, 0] = numpy.identity(B.shape[-1], dtype=B.dtype)
+            self.right[self.block, 1] = numpy.identity(B.shape[-1], dtype=B.dtype)
+
+        if self.diagonal_trial:
+            self.left[self.block, 0] = numpy.diag(
+                numpy.multiply(self.left[self.block, 0].diagonal(), self.BTinv[0].diagonal())
+            )
+            self.left[self.block, 1] = numpy.diag(
+                numpy.multiply(self.left[self.block, 1].diagonal(), self.BTinv[1].diagonal())
+            )
+        else:
+            self.left[self.block, 0] = self.left[self.block, 0].dot(self.BTinv[0])
+            self.left[self.block, 1] = self.left[self.block, 1].dot(self.BTinv[1])
+
+        self.right[self.block, 0] = B[0].dot(self.right[self.block, 0])
+        self.right[self.block, 1] = B[1].dot(self.right[self.block, 1])
+
+        if self.diagonal_trial:
+            self.stack[self.block, 0] = numpy.einsum(
+                "ii,ij->ij", self.left[self.block, 0], self.right[self.block, 0]
+            )
+            self.stack[self.block, 1] = numpy.einsum(
+                "ii,ij->ij", self.left[self.block, 1], self.right[self.block, 1]
+            )
+        else:
+            self.stack[self.block, 0] = self.left[self.block, 0].dot(self.right[self.block, 0])
+            self.stack[self.block, 1] = self.left[self.block, 1].dot(self.right[self.block, 1])
+
+        self.time_slice += 1  # Count the time slice
+        self.block = self.time_slice // self.stack_size  # Move to the next block if necessary
+        self.counter = (self.counter + 1) % self.stack_size  # Counting within a stack
+
+    def update_low_rank(self, B):
+        assert not self.averaging
+        # Diagonal = True assumes BT is diagonal and left is also diagonal
+        assert self.diagonal_trial
+
+        if self.counter == 0:
+            for s in [0, 1]:
+                self.Tl[s] = self.left[self.block, s]
+
+        mR = B.shape[-1]  # initial mR
+        mL = B.shape[-1]  # initial mR
+        mT = B.shape[-1]  # initial mR
+        next_block = (self.time_slice + 1) // self.stack_size  # move to the next block if necessary
+        # print("next_block", next_block)
+        # print("self.block", self.block)
+        if next_block > self.block:  # Do QR and update here?
+            for s in [0, 1]:
+                mR = len(self.Dr[s][numpy.abs(self.Dr[s]) > self.thresh])
+                self.Dl[s] = numpy.einsum("i,ii->i", self.Dl[s], self.BTinv[s])
+                mL = len(self.Dl[s][numpy.abs(self.Dl[s]) > self.thresh])
+
+                self.Qr[s][:, :mR] = B[s].dot(self.Qr[s][:, :mR])  # N x mR
+                self.Qr[s][:, mR:] = 0.0
+
+                Ccr = numpy.einsum("ij,j->ij", self.Qr[s][:, :mR], self.Dr[s][:mR])  # N x mR
+                (Qlcr, Rlcr, Plcr) = scipy.linalg.qr(Ccr, pivoting=True, check_finite=False)
+                Dlcr = Rlcr[:mR, :mR].diagonal()  # mR
+
+                self.Dr[s][:mR] = Dlcr
+                self.Dr[s][mR:] = 0.0
+                self.Qr[s] = Qlcr
+
+                Dinv = 1.0 / Dlcr  # mR
+                tmp = numpy.einsum("i,ij->ij", Dinv[:mR], Rlcr[:mR, :mR])  # mR, mR x mR -> mR x mR
+                tmp[:, Plcr] = tmp[:, range(mR)]
+                Tlcr = numpy.dot(tmp, self.Tr[s][:mR, :])  # mR x N
+
+                self.Tr[s][:mR, :] = Tlcr
+
+                # assume left stack is all diagonal (i.e., QDT = diagonal -> Q and T are identity)
+                Clcr = numpy.einsum(
+                    "i,ij->ij",
+                    self.Dl[s][:mL],
+                    numpy.einsum("ij,j->ij", Qlcr[:mL, :mR], Dlcr[:mR]),
+                )  # mL x mR
+
+                (Qlcr, Rlcr, Plcr) = scipy.linalg.qr(
+                    Clcr, pivoting=True, check_finite=False
+                )  # mL x mL, min(mL,mR) x min(mL,mR), mR x mR
+                Dlcr = Rlcr.diagonal()[: min(mL, mR)]
+                Dinv = 1.0 / Dlcr
+
+                mT = len(Dlcr[numpy.abs(Dlcr) > self.thresh])
+
+                assert mT <= mL and mT <= mR
+
+                tmp = numpy.einsum("i,ij->ij", Dinv[:mT], Rlcr[:mT, :])
+                tmp[:, Plcr] = tmp[:, range(mR)]  # mT x mR
+                Tlcr = numpy.dot(tmp, Tlcr)  # mT x N
+
+                Db = numpy.zeros(mT, B[s].dtype)
+                Ds = numpy.zeros(mT, B[s].dtype)
+                for i in range(mT):
+                    absDlcr = abs(Dlcr[i])
+                    if absDlcr > 1.0:
+                        Db[i] = 1.0 / absDlcr
+                        Ds[i] = numpy.sign(Dlcr[i])
+                    else:
+                        Db[i] = 1.0
+                        Ds[i] = Dlcr[i]
+                Dbinv = 1.0 / Db
+
+                TQ = Tlcr[:, :mL].dot(Qlcr[:mL, :mT])  # mT x mT
+                TQinv = scipy.linalg.inv(TQ, check_finite=False)
+                tmp = numpy.einsum("ij,j->ij", TQinv, Db) + numpy.diag(Ds)  # mT x mT
+
+                M = numpy.einsum("ij,j->ij", tmp, Dbinv).dot(TQ)
+                # self.ovlp[s] = 1.0 / scipy.linalg.det(M, check_finite=False)
+                self.ovlp[s] = scipy.linalg.det(M, check_finite=False)
+
+                tmp = scipy.linalg.inv(tmp, check_finite=False)
+                A = numpy.einsum("i,ij->ij", Db, tmp.dot(TQinv))  # mT x mT
+                Qlcr_pad = numpy.zeros((self.nbasis, self.nbasis), dtype=B[s].dtype)
+                Qlcr_pad[:mL, :mT] = Qlcr[:, :mT]
+
+                # self.G[s] = numpy.eye(self.nbasis, dtype=B[s].dtype) - Qlcr_pad[:,:mT].dot(numpy.diag(Dlcr[:mT])).dot(A).dot(Tlcr)
+
+                self.CT[s][:, :] = 0.0
+                self.CT[s][:, :mT] = (A.dot(Tlcr)).T.conj()
+                self.theta[s][:, :] = 0.0
+                self.theta[s][:mT, :] = Qlcr_pad[:, :mT].dot(numpy.diag(Dlcr[:mT])).T
+                # self.G[s] = numpy.eye(self.nbasis, dtype=B[s].dtype) - self.CT[s][:,:mT].dot(self.theta[s][:mT,:])
+                self.G[s] = numpy.eye(self.nbasis, dtype=B[s].dtype) - self.theta[s][:mT, :].T.dot(
+                    self.CT[s][:, :mT].T.conj()
+                )
+                # self.CT[s][:,:mT] = self.CT[s][:,:mT].conj()
+
+                # print("# mL, mR, mT = {}, {}, {}".format(mL, mR, mT))
+        else:  # don't do QR and just update
+            for s in [0, 1]:
+                mR = len(self.Dr[s][numpy.abs(self.Dr[s]) > self.thresh])
+
+                self.Dl[s] = numpy.einsum("i,ii->i", self.Dl[s], self.BTinv[s])
+                mL = len(self.Dl[s][numpy.abs(self.Dl[s]) > self.thresh])
+
+                self.Qr[s][:, :mR] = B[s].dot(self.Qr[s][:, :mR])  # N x mR
+                self.Qr[s][:, mR:] = 0.0
+
+                Ccr = numpy.einsum("ij,j->ij", self.Qr[s][:, :mR], self.Dr[s][:mR])  # N x mR
+                Clcr = numpy.einsum("i,ij->ij", self.Dl[s][:mL], Ccr[:mL, :mR])  # mL x mR
+
+                (Qlcr, Rlcr, Plcr) = scipy.linalg.qr(
+                    Clcr, pivoting=True, check_finite=False
+                )  # mL x mL, min(mL,mR) x min(mL,mR), mR x mR
+                Dlcr = Rlcr.diagonal()[: min(mL, mR)]
+                Dinv = 1.0 / Dlcr
+
+                mT = len(Dlcr[numpy.abs(Dlcr) > self.thresh])
+
+                assert mT <= mL and mT <= mR
+
+                tmp = numpy.einsum("i,ij->ij", Dinv[:mT], Rlcr[:mT, :])
+                tmp[:, Plcr] = tmp[:, range(mR)]  # mT x mR
+                Tlcr = numpy.dot(tmp, self.Tr[s][:mR, :])  # mT x N
+
+                Db = numpy.zeros(mT, B[s].dtype)
+                Ds = numpy.zeros(mT, B[s].dtype)
+                for i in range(mT):
+                    absDlcr = abs(Dlcr[i])
+                    if absDlcr > 1.0:
+                        Db[i] = 1.0 / absDlcr
+                        Ds[i] = numpy.sign(Dlcr[i])
+                    else:
+                        Db[i] = 1.0
+                        Ds[i] = Dlcr[i]
+                Dbinv = 1.0 / Db
+
+                TQ = Tlcr[:, :mL].dot(Qlcr[:mL, :mT])  # mT x mT
+                TQinv = scipy.linalg.inv(TQ, check_finite=False)
+                tmp = numpy.einsum("ij,j->ij", TQinv, Db) + numpy.diag(Ds)  # mT x mT
+
+                M = numpy.einsum("ij,j->ij", tmp, Dbinv).dot(TQ)
+                # self.ovlp[s] = 1.0 / scipy.linalg.det(M, check_finite=False)
+                self.ovlp[s] = scipy.linalg.det(M, check_finite=False)
+
+                tmp = scipy.linalg.inv(tmp, check_finite=False)
+                A = numpy.einsum("i,ij->ij", Db, tmp.dot(TQinv))  # mT x mT
+                Qlcr_pad = numpy.zeros((self.nbasis, self.nbasis), dtype=B[s].dtype)
+                Qlcr_pad[:mL, :mT] = Qlcr[:, :mT]
+
+                # self.CT[s][:,:] = 0.0
+                # self.CT[s][:,:mT] = Qlcr_pad[:,:mT].dot(numpy.diag(Dlcr[:mT]))
+                # self.theta[s][:,:] = 0.0
+                # self.theta[s][:mT,:] = A.dot(Tlcr)
+                # self.G[s] = numpy.eye(self.nbasis, dtype=B[s].dtype) - self.CT[s][:,:mT].dot(self.theta[s][:mT,:])
+                # self.CT[s][:,:mT] = self.CT[s][:,:mT].conj()
+                self.CT[s][:, :] = 0.0
+                self.CT[s][:, :mT] = (A.dot(Tlcr)).T.conj()
+                self.theta[s][:, :] = 0.0
+                self.theta[s][:mT, :] = Qlcr_pad[:, :mT].dot(numpy.diag(Dlcr[:mT])).T
+                # self.G[s] = numpy.eye(self.nbasis, dtype=B[s].dtype) - self.CT[s][:,:mT].dot(self.theta[s][:mT,:])
+                self.G[s] = numpy.eye(self.nbasis, dtype=B[s].dtype) - self.theta[s][:mT, :].T.dot(
+                    self.CT[s][:, :mT].T.conj()
+                )
+
+            # self.CT = numpy.zeros(shape=(2, nbasis, nbasis),dtype=dtype)
+            # self.theta = numpy.zeros(shape=(2, nbasis, nbasis),dtype=dtype)
+        # print("# mL, mR, mT = {}, {}, {}".format(mL, mR, mT))
+
+        # print("ovlp = {}".format(self.ovlp))
+        self.mT = mT
+        self.time_slice += 1  # Count the time slice
+        self.block = self.time_slice // self.stack_size  # move to the next block if necessary
+        self.counter = (self.counter + 1) % self.stack_size  # Counting within a stack
diff --git a/ipie/addons/thermal/walkers/tests/__init__.py b/ipie/addons/thermal/walkers/tests/__init__.py
new file mode 100644
index 00000000..e2aed039
--- /dev/null
+++ b/ipie/addons/thermal/walkers/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/ipie/addons/thermal/walkers/tests/test_population_control.py b/ipie/addons/thermal/walkers/tests/test_population_control.py
new file mode 100644
index 00000000..2dd6edf8
--- /dev/null
+++ b/ipie/addons/thermal/walkers/tests/test_population_control.py
@@ -0,0 +1,483 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+import pytest
+from typing import Union
+
+try:
+    from ipie.addons.thermal.utils.legacy_testing import build_legacy_generic_test_case_handlers_mpi
+    from ipie.addons.thermal.utils.legacy_testing import legacy_propagate_walkers
+
+    _no_cython = False
+
+except ModuleNotFoundError:
+    _no_cython = True
+
+from ipie.config import MPI
+from ipie.utils.mpi import MPIHandler
+from ipie.addons.thermal.walkers.pop_controller import ThermalPopController
+from ipie.addons.thermal.utils.testing import build_generic_test_case_handlers_mpi
+
+comm = MPI.COMM_WORLD
+
+
+@pytest.mark.skipif(_no_cython, reason="Need to build cython modules.")
+@pytest.mark.unit
+def test_pair_branch_batch():
+    mpi_handler = MPIHandler()
+
+    # System params.
+    nup = 5
+    ndown = 5
+    nelec = (nup, ndown)
+    nbasis = 10
+
+    # Thermal AFQMC params.
+    mu = -10.0
+    beta = 0.1
+    timestep = 0.01
+    nwalkers = 12
+    nblocks = 3
+    # Must be fixed at 1 for Thermal AFQMC--legacy code overides whatever input!
+    nsteps_per_block = 1
+    pop_control_method = "pair_branch"
+    lowrank = False
+
+    mf_trial = True
+    complex_integrals = False
+    debug = True
+    verbose = False if (comm.rank != 0) else True
+    seed = 7
+    numpy.random.seed(seed)
+
+    # Test.
+    objs = build_generic_test_case_handlers_mpi(
+        nelec,
+        nbasis,
+        mu,
+        beta,
+        timestep,
+        mpi_handler,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        complex_integrals=complex_integrals,
+        debug=debug,
+        seed=seed,
+        verbose=verbose,
+    )
+    trial = objs["trial"]
+    hamiltonian = objs["hamiltonian"]
+    walkers = objs["walkers"]
+    propagator = objs["propagator"]
+    pcontrol = ThermalPopController(
+        nwalkers, nsteps_per_block, mpi_handler, pop_control_method, verbose=verbose
+    )
+
+    # Legacy.
+    legacy_objs = build_legacy_generic_test_case_handlers_mpi(
+        hamiltonian,
+        mpi_handler,
+        nelec,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        seed=seed,
+        verbose=verbose,
+    )
+    legacy_system = legacy_objs["system"]
+    legacy_trial = legacy_objs["trial"]
+    legacy_hamiltonian = legacy_objs["hamiltonian"]
+    legacy_walkers = legacy_objs["walkers"]
+    legacy_propagator = legacy_objs["propagator"]
+
+    for block in range(nblocks):
+        for t in range(walkers.stack[0].nslice):
+            propagator.propagate_walkers(walkers, hamiltonian, trial, debug=True)
+            legacy_walkers = legacy_propagate_walkers(
+                legacy_hamiltonian,
+                legacy_trial,
+                legacy_walkers,
+                legacy_propagator,
+                xi=propagator.xi,
+            )
+
+            if t > 0:
+                pcontrol.pop_control(walkers, mpi_handler.comm)
+                legacy_walkers.pop_control(mpi_handler.comm)
+
+        walkers.reset(trial)  # Reset stack, weights, phase.
+        legacy_walkers.reset(legacy_trial)
+
+    for iw in range(walkers.nwalkers):
+        assert numpy.allclose(walkers.Ga[iw], legacy_walkers.walkers[iw].G[0])
+        assert numpy.allclose(walkers.Gb[iw], legacy_walkers.walkers[iw].G[1])
+        assert numpy.allclose(walkers.weight[iw], legacy_walkers.walkers[iw].weight)
+        assert numpy.allclose(
+            walkers.unscaled_weight[iw], legacy_walkers.walkers[iw].unscaled_weight
+        )
+
+
+# TODO: Lowrank code is WIP. See: https://github.com/JoonhoLee-Group/ipie/issues/302
+# @pytest.mark.skipif(_no_cython, reason="Need to build cython modules.")
+# @pytest.mark.unit
+def test_pair_branch_batch_lowrank():
+    mpi_handler = MPIHandler()
+
+    # System params.
+    nup = 5
+    ndown = 5
+    nelec = (nup, ndown)
+    nbasis = 10
+
+    # Thermal AFQMC params.
+    mu = -10.0
+    beta = 0.1
+    timestep = 0.01
+    nwalkers = 12
+    nblocks = 3
+    # Must be fixed at 1 for Thermal AFQMC--legacy code overides whatever input!
+    nsteps_per_block = 1
+    pop_control_method = "pair_branch"
+    lowrank = True
+
+    mf_trial = False
+    diagonal = True
+    complex_integrals = False
+    debug = True
+    verbose = False if (comm.rank != 0) else True
+    seed = 7
+    numpy.random.seed(seed)
+
+    options = {
+        "nelec": nelec,
+        "nbasis": nbasis,
+        "mu": mu,
+        "beta": beta,
+        "timestep": timestep,
+        "nwalkers": nwalkers,
+        "seed": seed,
+        "nsteps_per_block": nsteps_per_block,
+        "nblocks": nblocks,
+        "stabilize_freq": stabilize_freq,
+        "pop_control_freq": pop_control_freq,
+        "pop_control_method": pop_control_method,
+        "lowrank": lowrank,
+        "complex_integrals": complex_integrals,
+        "mf_trial": mf_trial,
+        "propagate": propagate,
+        "diagonal": diagonal,
+    }
+
+    # Test.
+    objs = build_generic_test_case_handlers_mpi(
+        nelec,
+        nbasis,
+        mu,
+        beta,
+        timestep,
+        mpi_handler,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        complex_integrals=complex_integrals,
+        diagonal=diagonal,
+        debug=debug,
+        seed=seed,
+        verbose=verbose,
+    )
+    trial = objs["trial"]
+    hamiltonian = objs["hamiltonian"]
+    walkers = objs["walkers"]
+    propagator = objs["propagator"]
+    pcontrol = ThermalPopController(
+        nwalkers,
+        nsteps_per_block,
+        mpi_handler,
+        pop_control_method=pop_control_method,
+        verbose=verbose,
+    )
+
+    # Legacy.
+    legacy_objs = build_legacy_generic_test_case_handlers_mpi(
+        hamiltonian,
+        mpi_handler,
+        nelec,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        seed=seed,
+        verbose=verbose,
+    )
+    legacy_system = legacy_objs["system"]
+    legacy_trial = legacy_objs["trial"]
+    legacy_hamiltonian = legacy_objs["hamiltonian"]
+    legacy_walkers = legacy_objs["walkers"]
+    legacy_propagator = legacy_objs["propagator"]
+
+    for block in range(nblocks):
+        for t in range(walkers.stack[0].nslice):
+            propagator.propagate_walkers(walkers, hamiltonian, trial, debug=True)
+            legacy_walkers = legacy_propagate_walkers(
+                legacy_hamiltonian,
+                legacy_trial,
+                legacy_walkers,
+                legacy_propagator,
+                xi=propagator.xi,
+            )
+
+            if t > 0:
+                pcontrol.pop_control(walkers, mpi_handler.comm)
+                legacy_walkers.pop_control(mpi_handler.comm)
+
+        walkers.reset(trial)  # Reset stack, weights, phase.
+        legacy_walkers.reset(legacy_trial)
+
+    for iw in range(walkers.nwalkers):
+        assert numpy.allclose(walkers.Ga[iw], legacy_walkers.walkers[iw].G[0])
+        assert numpy.allclose(walkers.Gb[iw], legacy_walkers.walkers[iw].G[1])
+        assert numpy.allclose(walkers.weight[iw], legacy_walkers.walkers[iw].weight)
+        assert numpy.allclose(
+            walkers.unscaled_weight[iw], legacy_walkers.walkers[iw].unscaled_weight
+        )
+
+
+@pytest.mark.skipif(_no_cython, reason="Need to build cython modules.")
+@pytest.mark.unit
+def test_comb_batch():
+    mpi_handler = MPIHandler()
+    # System params.
+    nup = 5
+    ndown = 5
+    nelec = (nup, ndown)
+    nbasis = 10
+
+    # Thermal AFQMC params.
+    mu = -10.0
+    beta = 0.1
+    timestep = 0.01
+    nwalkers = 12
+    nblocks = 3
+    # Must be fixed at 1 for Thermal AFQMC--legacy code overides whatever input!
+    nsteps_per_block = 1
+    pop_control_method = "comb"
+    lowrank = False
+
+    mf_trial = True
+    complex_integrals = False
+    debug = True
+    verbose = False if (comm.rank != 0) else True
+    seed = 7
+    numpy.random.seed(seed)
+
+    # Test.
+    objs = build_generic_test_case_handlers_mpi(
+        nelec,
+        nbasis,
+        mu,
+        beta,
+        timestep,
+        mpi_handler,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        complex_integrals=complex_integrals,
+        debug=debug,
+        seed=seed,
+        verbose=verbose,
+    )
+    trial = objs["trial"]
+    hamiltonian = objs["hamiltonian"]
+    walkers = objs["walkers"]
+    propagator = objs["propagator"]
+    pcontrol = ThermalPopController(
+        nwalkers,
+        nsteps_per_block,
+        mpi_handler,
+        pop_control_method=pop_control_method,
+        verbose=verbose,
+    )
+
+    # Legacy.
+    legacy_objs = build_legacy_generic_test_case_handlers_mpi(
+        hamiltonian,
+        mpi_handler,
+        nelec,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        pop_control_method=pop_control_method,
+        seed=seed,
+        verbose=verbose,
+    )
+    legacy_system = legacy_objs["system"]
+    legacy_trial = legacy_objs["trial"]
+    legacy_hamiltonian = legacy_objs["hamiltonian"]
+    legacy_walkers = legacy_objs["walkers"]
+    legacy_propagator = legacy_objs["propagator"]
+
+    for block in range(nblocks):
+        for t in range(walkers.stack[0].nslice):
+            propagator.propagate_walkers(walkers, hamiltonian, trial, debug=True)
+            legacy_walkers = legacy_propagate_walkers(
+                legacy_hamiltonian,
+                legacy_trial,
+                legacy_walkers,
+                legacy_propagator,
+                xi=propagator.xi,
+            )
+
+            if t > 0:
+                pcontrol.pop_control(walkers, mpi_handler.comm)
+                legacy_walkers.pop_control(mpi_handler.comm)
+
+        walkers.reset(trial)  # Reset stack, weights, phase.
+        legacy_walkers.reset(legacy_trial)
+
+    for iw in range(walkers.nwalkers):
+        assert numpy.allclose(walkers.Ga[iw], legacy_walkers.walkers[iw].G[0])
+        assert numpy.allclose(walkers.Gb[iw], legacy_walkers.walkers[iw].G[1])
+        assert numpy.allclose(walkers.weight[iw], legacy_walkers.walkers[iw].weight)
+        assert numpy.allclose(
+            walkers.unscaled_weight[iw], legacy_walkers.walkers[iw].unscaled_weight
+        )
+
+
+# TODO: Lowrank code is WIP. See: https://github.com/JoonhoLee-Group/ipie/issues/302
+# @pytest.mark.skipif(_no_cython, reason="Need to build cython modules.")
+# @pytest.mark.unit
+def test_comb_batch_lowrank():
+    mpi_handler = MPIHandler()
+
+    # System params.
+    nup = 5
+    ndown = 5
+    nelec = (nup, ndown)
+    nbasis = 10
+
+    # Thermal AFQMC params.
+    mu = -10.0
+    beta = 0.1
+    timestep = 0.01
+    nwalkers = 12
+    nblocks = 3
+    # Must be fixed at 1 for Thermal AFQMC--legacy code overides whatever input!
+    nsteps_per_block = 1
+    pop_control_method = "comb"
+    lowrank = True
+
+    mf_trial = False
+    diagonal = True
+    complex_integrals = False
+    debug = True
+    verbose = False if (comm.rank != 0) else True
+    seed = 7
+    numpy.random.seed(seed)
+
+    # Test.
+    objs = build_generic_test_case_handlers_mpi(
+        nelec,
+        nbasis,
+        mu,
+        beta,
+        timestep,
+        mpi_handler,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        complex_integrals=complex_integrals,
+        diagonal=diagonal,
+        debug=debug,
+        seed=seed,
+        verbose=verbose,
+    )
+    trial = objs["trial"]
+    hamiltonian = objs["hamiltonian"]
+    walkers = objs["walkers"]
+    propagator = objs["propagator"]
+    pcontrol = ThermalPopController(
+        nwalkers,
+        nsteps_per_block,
+        mpi_handler,
+        pop_control_method=pop_control_method,
+        verbose=verbose,
+    )
+
+    # Legacy.
+    legacy_objs = build_legacy_generic_test_case_handlers_mpi(
+        hamiltonian,
+        mpi_handler,
+        nelec,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        seed=seed,
+        verbose=verbose,
+    )
+    legacy_system = legacy_objs["system"]
+    legacy_trial = legacy_objs["trial"]
+    legacy_hamiltonian = legacy_objs["hamiltonian"]
+    legacy_walkers = legacy_objs["walkers"]
+    legacy_propagator = legacy_objs["propagator"]
+
+    for block in range(nblocks):
+        for t in range(walkers.stack[0].nslice):
+            propagator.propagate_walkers(walkers, hamiltonian, trial, debug=True)
+            legacy_walkers = legacy_propagate_walkers(
+                legacy_hamiltonian,
+                legacy_trial,
+                legacy_walkers,
+                legacy_propagator,
+                xi=propagator.xi,
+            )
+
+            if t > 0:
+                pcontrol.pop_control(walkers, mpi_handler.comm)
+                legacy_walkers.pop_control(mpi_handler.comm)
+
+        walkers.reset(trial)  # Reset stack, weights, phase.
+        legacy_walkers.reset(legacy_trial)
+
+    for iw in range(walkers.nwalkers):
+        assert numpy.allclose(walkers.Ga[iw], legacy_walkers.walkers[iw].G[0])
+        assert numpy.allclose(walkers.Gb[iw], legacy_walkers.walkers[iw].G[1])
+        assert numpy.allclose(walkers.weight[iw], legacy_walkers.walkers[iw].weight)
+        assert numpy.allclose(
+            walkers.unscaled_weight[iw], legacy_walkers.walkers[iw].unscaled_weight
+        )
+
+
+if __name__ == "__main__":
+    test_pair_branch_batch()
+    test_comb_batch()
+
+    # test_pair_branch_batch_lowrank()
+    # test_comb_batch_lowrank()
diff --git a/ipie/addons/thermal/walkers/tests/test_thermal_walkers.py b/ipie/addons/thermal/walkers/tests/test_thermal_walkers.py
new file mode 100644
index 00000000..f082e670
--- /dev/null
+++ b/ipie/addons/thermal/walkers/tests/test_thermal_walkers.py
@@ -0,0 +1,219 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+import pytest
+from typing import Union
+
+try:
+    from ipie.addons.thermal.utils.legacy_testing import build_legacy_generic_test_case_handlers
+    from ipie.addons.thermal.utils.legacy_testing import legacy_propagate_walkers
+
+    _no_cython = False
+
+except ModuleNotFoundError:
+    _no_cython = True
+
+from ipie.config import MPI
+from ipie.addons.thermal.estimators.generic import local_energy_generic_cholesky
+from ipie.addons.thermal.estimators.thermal import one_rdm_from_G
+from ipie.addons.thermal.utils.testing import build_generic_test_case_handlers
+
+from ipie.legacy.estimators.generic import (
+    local_energy_generic_cholesky as legacy_local_energy_generic_cholesky,
+)
+from ipie.legacy.estimators.thermal import one_rdm_from_G as legacy_one_rdm_from_G
+
+comm = MPI.COMM_WORLD
+
+
+@pytest.mark.skipif(_no_cython, reason="Need to build cython modules.")
+@pytest.mark.unit
+def test_thermal_walkers_fullrank():
+    # System params.
+    nup = 5
+    ndown = 5
+    nelec = (nup, ndown)
+    nbasis = 10
+
+    # Thermal AFQMC params.
+    mu = -10.0
+    beta = 0.1
+    timestep = 0.01
+    nwalkers = 10
+    lowrank = False
+
+    mf_trial = True
+    complex_integrals = False
+    debug = True
+    verbose = True
+    seed = 7
+    numpy.random.seed(seed)
+
+    # Test.
+    objs = build_generic_test_case_handlers(
+        nelec,
+        nbasis,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        complex_integrals=complex_integrals,
+        debug=debug,
+        seed=seed,
+        verbose=verbose,
+    )
+    trial = objs["trial"]
+    hamiltonian = objs["hamiltonian"]
+    walkers = objs["walkers"]
+
+    # Legacy.
+    legacy_objs = build_legacy_generic_test_case_handlers(
+        hamiltonian,
+        comm,
+        nelec,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        seed=seed,
+        verbose=verbose,
+    )
+    legacy_system = legacy_objs["system"]
+    legacy_trial = legacy_objs["trial"]
+    legacy_hamiltonian = legacy_objs["hamiltonian"]
+    legacy_walkers = legacy_objs["walkers"]
+
+    for iw in range(walkers.nwalkers):
+        P = one_rdm_from_G(numpy.array([walkers.Ga[iw], walkers.Gb[iw]]))
+        eloc = local_energy_generic_cholesky(hamiltonian, P)
+
+        legacy_P = legacy_one_rdm_from_G(numpy.array(legacy_walkers.walkers[iw].G))
+        legacy_eloc = legacy_local_energy_generic_cholesky(
+            legacy_system, legacy_hamiltonian, legacy_P
+        )
+
+        numpy.testing.assert_almost_equal(legacy_eloc, eloc, decimal=10)
+        numpy.testing.assert_almost_equal(
+            legacy_walkers.walkers[iw].G[0], walkers.Ga[iw], decimal=10
+        )
+        numpy.testing.assert_almost_equal(
+            legacy_walkers.walkers[iw].G[1], walkers.Gb[iw], decimal=10
+        )
+        numpy.testing.assert_almost_equal(
+            legacy_walkers.walkers[iw].stack.ovlp[0], walkers.stack[iw].ovlp[0], decimal=10
+        )
+        numpy.testing.assert_almost_equal(
+            legacy_walkers.walkers[iw].stack.ovlp[1], walkers.stack[iw].ovlp[1], decimal=10
+        )
+
+
+# TODO: Lowrank code is WIP.
+# @pytest.mark.skipif(_no_cython, reason="Need to build cython modules.")
+# @pytest.mark.unit
+def test_thermal_walkers_lowrank():
+    # System params.
+    nup = 5
+    ndown = 5
+    nelec = (nup, ndown)
+    nbasis = 10
+
+    # Thermal AFQMC params.
+    mu = -10.0
+    beta = 0.1
+    timestep = 0.01
+    nwalkers = 10
+    lowrank = True
+
+    mf_trial = False
+    diagonal = True
+    complex_integrals = False
+    debug = True
+    verbose = True
+    seed = 7
+    numpy.random.seed(seed)
+
+    # Test.
+    objs = build_generic_test_case_handlers(
+        nelec,
+        nbasis,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        complex_integrals=complex_integrals,
+        debug=debug,
+        seed=seed,
+        verbose=verbose,
+    )
+    trial = objs["trial"]
+    hamiltonian = objs["hamiltonian"]
+    walkers = objs["walkers"]
+
+    # Legacy.
+    legacy_objs = build_legacy_generic_test_case_handlers(
+        hamiltonian,
+        comm,
+        nelec,
+        mu,
+        beta,
+        timestep,
+        nwalkers=nwalkers,
+        lowrank=lowrank,
+        mf_trial=mf_trial,
+        seed=seed,
+        verbose=verbose,
+    )
+    legacy_system = legacy_objs["system"]
+    legacy_trial = legacy_objs["trial"]
+    legacy_hamiltonian = legacy_objs["hamiltonian"]
+    legacy_walkers = legacy_objs["walkers"]
+
+    for iw in range(walkers.nwalkers):
+        P = one_rdm_from_G(numpy.array([walkers.Ga[iw], walkers.Gb[iw]]))
+        eloc = local_energy_generic_cholesky(hamiltonian, P)
+
+        legacy_P = legacy_one_rdm_from_G(numpy.array(legacy_walkers.walkers[iw].G))
+        legacy_eloc = legacy_local_energy_generic_cholesky(
+            legacy_system, legacy_hamiltonian, legacy_P
+        )
+
+        numpy.testing.assert_almost_equal(legacy_eloc, eloc, decimal=10)
+        numpy.testing.assert_almost_equal(
+            legacy_walkers.walkers[iw].G[0], walkers.Ga[iw], decimal=10
+        )
+        numpy.testing.assert_almost_equal(
+            legacy_walkers.walkers[iw].G[1], walkers.Gb[iw], decimal=10
+        )
+        numpy.testing.assert_almost_equal(
+            legacy_walkers.walkers[iw].stack.ovlp[0], walkers.stack[iw].ovlp[0], decimal=10
+        )
+        numpy.testing.assert_almost_equal(
+            legacy_walkers.walkers[iw].stack.ovlp[1], walkers.stack[iw].ovlp[1], decimal=10
+        )
+
+
+if __name__ == "__main__":
+    test_thermal_walkers_fullrank()
+    # test_thermal_walkers_lowrank()
diff --git a/ipie/addons/thermal/walkers/uhf_walkers.py b/ipie/addons/thermal/walkers/uhf_walkers.py
new file mode 100644
index 00000000..9348f422
--- /dev/null
+++ b/ipie/addons/thermal/walkers/uhf_walkers.py
@@ -0,0 +1,159 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fmalone@google.com>
+#          Joonho Lee
+#
+
+import numpy
+import scipy.linalg
+
+from ipie.addons.thermal.estimators.particle_number import particle_number
+from ipie.addons.thermal.estimators.thermal import one_rdm_from_G
+from ipie.addons.thermal.estimators.greens_function import greens_function_qr_strat
+from ipie.addons.thermal.walkers.stack import PropagatorStack
+from ipie.utils.misc import update_stack
+from ipie.walkers.base_walkers import BaseWalkers
+from ipie.addons.thermal.trial.one_body import OneBody
+
+
+class UHFThermalWalkers(BaseWalkers):
+    def __init__(
+        self,
+        trial: OneBody,
+        nbasis: int,
+        nwalkers: int,
+        stack_size=None,
+        lowrank: bool = False,
+        lowrank_thresh: float = 1e-6,
+        mpi_handler=None,
+        verbose: bool = False,
+    ):
+        """UHF style walker."""
+        assert isinstance(trial, OneBody)
+        super().__init__(nwalkers, verbose=verbose)
+
+        self.nbasis = nbasis
+        self.mpi_handler = mpi_handler
+        self.nslice = trial.nslice
+        self.stack_size = stack_size
+
+        if self.stack_size == None:
+            self.stack_size = trial.stack_size
+
+        if (self.nslice // self.stack_size) * self.stack_size != self.nslice:
+            if verbose:
+                print("# Input stack size does not divide number of slices.")
+            self.stack_size = update_stack(self.stack_size, self.nslice, verbose)
+
+        if self.stack_size > trial.stack_size:
+            if verbose:
+                print("# Walker stack size differs from that estimated from trial density matrix.")
+                print(f"# Be careful. cond(BT)**stack_size: {trial.cond ** self.stack_size:10.3e}.")
+
+        self.stack_length = self.nslice // self.stack_size
+        self.lowrank = lowrank
+        self.lowrank_thresh = lowrank_thresh
+
+        self.Ga = numpy.zeros(
+            shape=(self.nwalkers, self.nbasis, self.nbasis), dtype=numpy.complex128
+        )
+        self.Gb = numpy.zeros(
+            shape=(self.nwalkers, self.nbasis, self.nbasis), dtype=numpy.complex128
+        )
+        self.Ghalf = None
+
+        max_diff_diag = numpy.linalg.norm((numpy.diag(trial.dmat[0].diagonal()) - trial.dmat[0]))
+
+        if max_diff_diag < 1e-10:
+            self.diagonal_trial = True
+            if verbose:
+                print("# Trial density matrix is diagonal.")
+        else:
+            self.diagonal_trial = False
+            if verbose:
+                print("# Trial density matrix is not diagonal.")
+
+        if verbose:
+            print(f"# Walker stack size: {self.stack_size}")
+            print(f"# Using low rank trick: {self.lowrank}")
+
+        self.stack = [
+            PropagatorStack(
+                self.stack_size,
+                self.nslice,
+                self.nbasis,
+                numpy.complex128,
+                trial.dmat,
+                trial.dmat_inv,
+                diagonal=self.diagonal_trial,
+                lowrank=self.lowrank,
+                thresh=self.lowrank_thresh,
+            )
+            for iw in range(self.nwalkers)
+        ]
+
+        # Initialise all propagators to the trial density matrix.
+        for iw in range(self.nwalkers):
+            self.stack[iw].set_all(trial.dmat)
+            greens_function_qr_strat(self, iw)
+            self.stack[iw].G[0] = self.Ga[iw]
+            self.stack[iw].G[1] = self.Gb[iw]
+
+        # Shape (nwalkers,).
+        self.M0a = numpy.array(
+            [scipy.linalg.det(self.Ga[iw], check_finite=False) for iw in range(self.nwalkers)]
+        )
+        self.M0b = numpy.array(
+            [scipy.linalg.det(self.Gb[iw], check_finite=False) for iw in range(self.nwalkers)]
+        )
+
+        for iw in range(self.nwalkers):
+            self.stack[iw].ovlp = numpy.array([1.0 / self.M0a[iw], 1.0 / self.M0b[iw]])
+
+        self.hybrid_energy = 0.0
+        if verbose:
+            for iw in range(self.nwalkers):
+                G = numpy.array([self.Ga[iw], self.Gb[iw]])
+                P = one_rdm_from_G(G)
+                nav = particle_number(P)
+                print(f"# Trial electron number for {iw}-th walker: {nav}")
+
+        self.buff_names += ["Ga", "Gb"]
+        self.buff_size = round(self.set_buff_size_single_walker() / float(self.nwalkers))
+        self.walker_buffer = numpy.zeros(self.buff_size, dtype=numpy.complex128)
+
+    def calc_greens_function(self, iw, slice_ix=None, inplace=True):
+        """Return the Green's function for walker `iw`."""
+        if self.lowrank:
+            return self.stack[iw].G  # G[0] = Ga, G[1] = Gb
+
+        else:
+            return greens_function_qr_strat(self, iw, slice_ix=slice_ix, inplace=inplace)
+
+    def reset(self, trial):
+        self.weight = numpy.ones(self.nwalkers)
+        self.phase = numpy.ones(self.nwalkers, dtype=numpy.complex128)
+
+        for iw in range(self.nwalkers):
+            self.stack[iw].reset()
+            self.stack[iw].set_all(trial.dmat)
+            self.calc_greens_function(iw)
+
+    # For compatibiltiy with BaseWalkers class.
+    def reortho(self):
+        pass
+
+    def reortho_batched(self):
+        pass
diff --git a/ipie/analysis/extraction.py b/ipie/analysis/extraction.py
index f9103afd..ec33ac8b 100755
--- a/ipie/analysis/extraction.py
+++ b/ipie/analysis/extraction.py
@@ -230,7 +230,10 @@ def extract_test_data_hdf5(filename, skip=10):
         data = extract_mixed_estimates(filename)
         # use list so can json serialise easily.
         data = data.drop(["Iteration", "Time"], axis=1)[::skip].to_dict(orient="list")
-    data["sys_info"] = get_metadata(filename)["sys_info"]
+    try:
+        data["sys_info"] = get_metadata(filename)["sys_info"]
+    except KeyError:
+        print("\n# No 'sys_info' metadata!")
     try:
         mrdm = extract_rdm(filename, est_type="mixed", rdm_type="one_rdm")
     except (KeyError, TypeError, AttributeError):
diff --git a/ipie/estimators/energy.py b/ipie/estimators/energy.py
index d8a6936b..8a5eb13f 100644
--- a/ipie/estimators/energy.py
+++ b/ipie/estimators/energy.py
@@ -15,6 +15,8 @@
 # Author: Fionn Malone <fmalone@google.com>
 #
 
+from typing import Union
+
 import plum
 
 from ipie.estimators.estimator_base import EstimatorBase
@@ -30,6 +32,7 @@
     local_energy_multi_det_trial_wicks_batch_opt_chunked,
 )
 from ipie.hamiltonians.generic import GenericComplexChol, GenericRealChol
+from ipie.hamiltonians.generic_chunked import GenericRealCholChunked
 from ipie.systems.generic import Generic
 from ipie.trial_wavefunction.noci import NOCI
 from ipie.trial_wavefunction.particle_hole import (
@@ -45,7 +48,10 @@
 
 @plum.dispatch
 def local_energy(
-    system: Generic, hamiltonian: GenericRealChol, walkers: UHFWalkers, trial: SingleDet
+    system: Generic,
+    hamiltonian: Union[GenericRealChol, GenericRealCholChunked],
+    walkers: UHFWalkers,
+    trial: SingleDet,
 ):
     return local_energy_batch(system, hamiltonian, walkers, trial)
 
@@ -113,9 +119,6 @@ def __init__(
         trial=None,
         filename=None,
     ):
-        assert system is not None
-        assert ham is not None
-        assert trial is not None
         super().__init__()
         self._eshift = 0.0
         self.scalar_estimator = True
@@ -131,7 +134,7 @@ def __init__(
         self.print_to_stdout = True
         self.ascii_filename = filename
 
-    def compute_estimator(self, system, walkers, hamiltonian, trial, istep=1):
+    def compute_estimator(self, system=None, walkers=None, hamiltonian=None, trial=None):
         trial.calc_greens_function(walkers)
         # Need to be able to dispatch here
         energy = local_energy(system, hamiltonian, walkers, trial)
diff --git a/ipie/estimators/estimator_base.py b/ipie/estimators/estimator_base.py
index c78c9322..0cedfda5 100644
--- a/ipie/estimators/estimator_base.py
+++ b/ipie/estimators/estimator_base.py
@@ -15,13 +15,13 @@
 # Author: Fionn Malone <fmalone@google.com>
 #
 
-from abc import abstractmethod, ABCMeta
+from abc import ABCMeta, abstractmethod
 
 import numpy as np
 
-from ipie.utils.io import format_fixed_width_strings, format_fixed_width_floats
 from ipie.utils.backend import arraylib as xp
 from ipie.utils.backend import to_host
+from ipie.utils.io import format_fixed_width_floats, format_fixed_width_strings
 
 
 class EstimatorBase(metaclass=ABCMeta):
@@ -78,7 +78,7 @@ def size(self) -> int:
         size = 0
         for _, v in self._data.items():
             if isinstance(v, np.ndarray):
-                size += np.prod(v.shape)
+                size += int(np.prod(v.shape))
             else:
                 size += 1
         return size
@@ -89,8 +89,9 @@ def shape(self, shape) -> tuple:
         self._shape = shape
 
     @abstractmethod
-    def compute_estimator(self, system, walkers, hamiltonian, trial) -> np.ndarray:
-        ...
+    def compute_estimator(
+        self, system=None, walkers=None, hamiltonian=None, trial=None
+    ) -> np.ndarray: ...
 
     @property
     def names(self):
@@ -142,5 +143,4 @@ def zero(self):
             else:
                 self._data[k] = 0.0j
 
-    def post_reduce_hook(self, data) -> None:
-        ...
+    def post_reduce_hook(self, data) -> None: ...
diff --git a/ipie/estimators/generic.py b/ipie/estimators/generic.py
index 6f69d085..469abc3c 100644
--- a/ipie/estimators/generic.py
+++ b/ipie/estimators/generic.py
@@ -33,9 +33,7 @@ def local_energy_generic_opt(system, G, Ghalf=None, eri=None):
     assert eri is not None
 
     vipjq_aa = eri[0, : na**2 * M**2].reshape((na, M, na, M))
-    vipjq_bb = eri[0, na**2 * M**2 : na**2 * M**2 + nb**2 * M**2].reshape(
-        (nb, M, nb, M)
-    )
+    vipjq_bb = eri[0, na**2 * M**2 : na**2 * M**2 + nb**2 * M**2].reshape((nb, M, nb, M))
     vipjq_ab = eri[0, na**2 * M**2 + nb**2 * M**2 :].reshape((na, M, nb, M))
 
     Ga, Gb = Ghalf[0], Ghalf[1]
diff --git a/ipie/estimators/greens_function_single_det.py b/ipie/estimators/greens_function_single_det.py
index 7b6ed946..5a26a1b0 100644
--- a/ipie/estimators/greens_function_single_det.py
+++ b/ipie/estimators/greens_function_single_det.py
@@ -115,6 +115,8 @@ def greens_function_single_det_batch(walker_batch, trial, build_full=False):
         Overlap with trial.
     """
     ndown = walker_batch.ndown
+    walker_batch.phia = xp.ascontiguousarray(walker_batch.phia)
+    walker_batch.phib = xp.ascontiguousarray(walker_batch.phib)
 
     ovlp_a = xp.einsum("wmi,mj->wij", walker_batch.phia, trial.psi0a.conj(), optimize=True)
     ovlp_inv_a = xp.linalg.inv(ovlp_a)
diff --git a/ipie/estimators/handler.py b/ipie/estimators/handler.py
index a35dbfe1..cbe01fe9 100644
--- a/ipie/estimators/handler.py
+++ b/ipie/estimators/handler.py
@@ -190,7 +190,7 @@ def increment_file_number(self):
         self.index = self.index + 1
         self.filename = self.basename + f".{self.index}.h5"
 
-    def compute_estimators(self, comm, system, hamiltonian, trial, walker_batch):
+    def compute_estimators(self, system=None, hamiltonian=None, trial=None, walker_batch=None):
         """Update estimators with bached psi
 
         Parameters
@@ -201,7 +201,9 @@ def compute_estimators(self, comm, system, hamiltonian, trial, walker_batch):
         # TODO: generalize for different block groups (loop over groups)
         offset = self.num_walker_props
         for k, e in self.items():
-            e.compute_estimator(system, walker_batch, hamiltonian, trial)
+            e.compute_estimator(
+                system=system, walkers=walker_batch, hamiltonian=hamiltonian, trial=trial
+            )
             start = offset + self.get_offset(k)
             end = start + int(self[k].size)
             self.local_estimates[start:end] += e.data
diff --git a/ipie/estimators/local_energy_sd_chunked.py b/ipie/estimators/local_energy_sd_chunked.py
index c164d90e..2e2394e2 100644
--- a/ipie/estimators/local_energy_sd_chunked.py
+++ b/ipie/estimators/local_energy_sd_chunked.py
@@ -223,6 +223,7 @@ def exx_kernel_batch_rchol_gpu_low_mem(rchola_chunk, Ghalfa, buff):
     _buff = buff.ravel()
     for i in range(nchol_chunks):
         nchol_chunk = min(nchol_chunk_size, nchol_left)
+        nchol_left -= nchol_chunk
         chol_sls = slice(i * nchol_chunk_size, i * nchol_chunk_size + nchol_chunk)
         size = nwalkers * nchol_chunk * nalpha * nalpha
         # alpha-alpha
diff --git a/ipie/estimators/tests/test_estimators.py b/ipie/estimators/tests/test_estimators.py
index 59adc89f..0bd480fd 100644
--- a/ipie/estimators/tests/test_estimators.py
+++ b/ipie/estimators/tests/test_estimators.py
@@ -72,5 +72,5 @@ def test_estimator_handler():
         handler["energy1"] = estim
         handler.json_string = ""
         handler.initialize(comm)
-        handler.compute_estimators(comm, system, ham, trial, walker_batch)
-        handler.compute_estimators(comm, system, ham, trial, walker_batch)
+        handler.compute_estimators(system, ham, trial, walker_batch)
+        handler.compute_estimators(system, ham, trial, walker_batch)
diff --git a/ipie/estimators/tests/test_generic_chunked.py b/ipie/estimators/tests/test_generic_chunked.py
index 1e2ddcbe..f3db6700 100644
--- a/ipie/estimators/tests/test_generic_chunked.py
+++ b/ipie/estimators/tests/test_generic_chunked.py
@@ -21,7 +21,11 @@
 
 from ipie.config import MPI
 from ipie.estimators.local_energy_sd import local_energy_single_det_batch
-from ipie.estimators.local_energy_sd_chunked import local_energy_single_det_uhf_batch_chunked
+from ipie.estimators.local_energy_sd_chunked import (
+    local_energy_single_det_uhf_batch_chunked,
+    local_energy_single_det_uhf_batch_chunked_gpu,
+)
+from ipie.hamiltonians.generic_chunked import GenericRealCholChunked
 from ipie.hamiltonians.generic import Generic as HamGeneric
 from ipie.propagation.phaseless_generic import PhaselessGenericChunked
 from ipie.systems.generic import Generic
@@ -62,31 +66,36 @@ def test_generic_chunked():
 
     idx = numpy.triu_indices(nmo)
     cp_shape = (nmo * (nmo + 1) // 2, chol.shape[-1])
-    # chol_packed = numpy.zeros(cp_shape, dtype = chol.dtype)
     chol_packed = get_shared_array(comm, cp_shape, chol.dtype)
 
     if comm.rank == 0:
         pack_cholesky(idx[0], idx[1], chol_packed, chol)
 
     chol = chol.reshape((nmo * nmo, nchol))
+    mpi_handler = MPIHandler(nmembers=3, verbose=(rank == 0))
 
     system = Generic(nelec=nelec)
-    ham = HamGeneric(h1e=numpy.array([h1e, h1e]), chol=chol, ecore=enuc)
+    if comm.rank == 0:
+        print("# Chunking hamiltonian.")
+    ham = GenericRealCholChunked(
+        h1e=numpy.array([h1e, h1e]), chol=chol, ecore=enuc, handler=mpi_handler
+    )
+    ham_nochunk = HamGeneric(h1e=numpy.array([h1e, h1e]), chol=chol, ecore=enuc)
     _, wfn = get_random_nomsd(system.nup, system.ndown, ham.nbasis, ndet=1, cplx=False)
     trial = SingleDet(wfn[0], nelec, nmo)
+    trial.handler = mpi_handler
+    if comm.rank == 0:
+        print("# Chunking trial.")
     trial.half_rotate(ham)
 
-    trial.calculate_energy(system, ham)
+    # work around to compute the trial energy
+    trial_nochunk = SingleDet(wfn[0], nelec, nmo)
+    trial_nochunk.half_rotate(ham_nochunk)
+    trial_nochunk.calculate_energy(system, ham_nochunk)
 
-    qmc = dotdict({"dt": 0.005, "nstblz": 5, "batched": True, "nwalkers": nwalkers})
+    comm.barrier()
 
-    mpi_handler = MPIHandler(nmembers=3, verbose=(rank == 0))
-    if comm.rank == 0:
-        print("# Chunking hamiltonian.")
-    ham.chunk(mpi_handler)
-    if comm.rank == 0:
-        print("# Chunking trial.")
-    trial.chunk(mpi_handler)
+    qmc = dotdict({"dt": 0.005, "nstblz": 5, "batched": True, "nwalkers": nwalkers})
 
     prop = PhaselessGenericChunked(time_step=qmc["dt"])
     prop.build(ham, trial, mpi_handler=mpi_handler)
@@ -98,10 +107,10 @@ def test_generic_chunked():
     walkers.build(trial)
 
     for i in range(nsteps):
-        prop.propagate_walkers(walkers, ham, trial, trial.energy)
+        prop.propagate_walkers(walkers, ham, trial, trial_nochunk.energy)
         walkers.reortho()
 
-    energies = local_energy_single_det_batch(system, ham, walkers, trial)
+    energies = local_energy_single_det_batch(system, ham_nochunk, walkers, trial_nochunk)
 
     energies_chunked = local_energy_single_det_uhf_batch_chunked(system, ham, walkers, trial)
 
diff --git a/ipie/hamiltonians/generic.py b/ipie/hamiltonians/generic.py
index 44ed4043..d00873c5 100644
--- a/ipie/hamiltonians/generic.py
+++ b/ipie/hamiltonians/generic.py
@@ -19,6 +19,7 @@
 import numpy
 from ipie.hamiltonians.generic_base import GenericBase
 from ipie.utils.pack_numba import pack_cholesky
+from ipie.utils.backend import arraylib as xp
 
 from ipie.utils.io import (
     from_qmcpack_dense,
@@ -29,12 +30,12 @@
 
 def construct_h1e_mod(chol, h1e, h1e_mod):
     # Subtract one-body bit following reordering of 2-body operators.
-    # Eqn (17) of [Motta17]_
+    # Eqn (17) of [Motta17].
     nbasis = h1e.shape[-1]
     nchol = chol.shape[-1]
     chol_view = chol.reshape((nbasis, nbasis * nchol))
     # assert chol_view.__array_interface__['data'][0] == chol.__array_interface__['data'][0]
-    v0 = 0.5 * numpy.dot(
+    v0 = 0.5 * xp.dot(
         chol_view,
         chol_view.T.conj(),  # conjugate added to account for complex integrals
     )  # einsum('ikn,jkn->ij', chol_3, chol_3, optimize=True)
@@ -47,7 +48,7 @@ class GenericRealChol(GenericBase):
     Can be created by passing the one and two electron integrals directly.
     """
 
-    def __init__(self, h1e, chol, ecore=0.0, verbose=False):
+    def __init__(self, h1e, chol, ecore=0.0, shmem=False, chol_packed=None, verbose=False):
         assert (
             h1e.shape[0] == 2
         )  # assuming each spin component is given. this should be fixed for GHF...?
@@ -60,19 +61,25 @@ def __init__(self, h1e, chol, ecore=0.0, verbose=False):
         self.nfields = self.nchol
         assert self.nbasis**2 == chol.shape[0]
 
-        self.chol = self.chol.reshape((self.nbasis, self.nbasis, self.nchol))
         self.sym_idx = numpy.triu_indices(self.nbasis)
-        cp_shape = (self.nbasis * (self.nbasis + 1) // 2, self.chol.shape[-1])
-        self.chol_packed = numpy.zeros(cp_shape, dtype=self.chol.dtype)
-        pack_cholesky(self.sym_idx[0], self.sym_idx[1], self.chol_packed, self.chol)
-        self.chol = self.chol.reshape((self.nbasis * self.nbasis, self.nchol))
+        self.sym_idx_i = self.sym_idx[0].copy()
+        self.sym_idx_j = self.sym_idx[1].copy()
+        if not shmem:
+            self.chol = self.chol.reshape((self.nbasis, self.nbasis, self.nchol))
+            cp_shape = (self.nbasis * (self.nbasis + 1) // 2, self.chol.shape[-1])
+            self.chol_packed = numpy.zeros(cp_shape, dtype=self.chol.dtype)
+            pack_cholesky(self.sym_idx[0], self.sym_idx[1], self.chol_packed, self.chol)
+            self.chol = self.chol.reshape((self.nbasis * self.nbasis, self.nchol))
+        else:
+            self.chol = chol
+            self.chol_packed = chol_packed
 
         self.chunked = False
 
         # this is the one-body part that comes out of re-ordering the 2-body operators
         h1e_mod = numpy.zeros(self.H1.shape, dtype=self.H1.dtype)
         construct_h1e_mod(self.chol, self.H1, h1e_mod)
-        self.h1e_mod = h1e_mod
+        self.h1e_mod = xp.array(h1e_mod)
 
         if verbose:
             mem = self.chol.nbytes / (1024.0**3)
@@ -102,7 +109,7 @@ def __init__(self, h1e, chol, ecore=0.0, verbose=False):
 
         self.chol = numpy.array(chol, dtype=numpy.complex128)  # [M^2, nchol]
         self.nchol = self.chol.shape[-1]
-        self.nfields = self.nchol * 2
+        self.nfields = 2 * self.nchol
         assert self.nbasis**2 == chol.shape[0]
 
         self.chunked = False
@@ -110,7 +117,7 @@ def __init__(self, h1e, chol, ecore=0.0, verbose=False):
         # this is the one-body part that comes out of re-ordering the 2-body operators
         h1e_mod = numpy.zeros(self.H1.shape, dtype=self.H1.dtype)
         construct_h1e_mod(self.chol, self.H1, h1e_mod)
-        self.h1e_mod = h1e_mod
+        self.h1e_mod = xp.array(h1e_mod)
 
         # We need to store A and B integrals
         self.chol = self.chol.reshape((self.nbasis, self.nbasis, self.nchol))
@@ -143,11 +150,11 @@ def hijkl(self, i, j, k, l):  # (ik|jl) somehow physicist notation - terrible!!
         return numpy.dot(chol_ik, chol_lj.conj())
 
 
-def Generic(h1e, chol, ecore=0.0, verbose=False):
+def Generic(h1e, chol, ecore=0.0, shmem=False, chol_packed=None, verbose=False):
     if chol.dtype == numpy.dtype("complex128"):
         return GenericComplexChol(h1e, chol, ecore, verbose)
     elif chol.dtype == numpy.dtype("float64"):
-        return GenericRealChol(h1e, chol, ecore, verbose)
+        return GenericRealChol(h1e, chol, ecore, shmem, chol_packed, verbose)
 
 
 def read_integrals(integral_file):
diff --git a/ipie/hamiltonians/generic_base.py b/ipie/hamiltonians/generic_base.py
index 8ada1c58..37b6402a 100644
--- a/ipie/hamiltonians/generic_base.py
+++ b/ipie/hamiltonians/generic_base.py
@@ -48,14 +48,14 @@ def chunk(self, handler, verbose=False):
         chol_idxs = [i for i in range(self.nchol)]
         self.chol_idxs_chunk = handler.scatter_group(chol_idxs)
 
-        # if handler.srank == 0:  # creating copies for every rank = 0!!!!
-        self.chol_packed = self.chol_packed.T.copy()  # [chol, M^2]
+        if handler.srank == 0:  # creating copies for every rank = 0!!!!
+            self.chol_packed = self.chol_packed.T.copy()  # [chol, M^2]
+        else:
+            self.chol_packed = self.chol_packed.T  # [chol, M^2]
         handler.comm.barrier()
 
         self.chol_packed_chunk = handler.scatter_group(self.chol_packed)  # distribute over chol
 
-        # if handler.srank == 0:
-        self.chol_packed = self.chol_packed.T.copy()  # [M^2, chol]
         handler.comm.barrier()
 
         self.chol_packed_chunk = self.chol_packed_chunk.T.copy()  # [M^2, chol_chunk]
@@ -63,19 +63,19 @@ def chunk(self, handler, verbose=False):
         tot_size = handler.allreduce_group(self.chol_packed_chunk.size)
 
         assert self.chol_packed.size == tot_size
+        del self.chol_packed
 
         # distributing chol
-        # if handler.comm.rank == 0:
-        self.chol = self.chol.T.copy()  # [chol, M^2]
+        if handler.srank == 0:
+            self.chol = self.chol.T.copy()  # [M^2, chol]
+        else:
+            self.chol = self.chol.T
         handler.comm.barrier()
-
         self.chol_chunk = handler.scatter_group(self.chol)  # distribute over chol
-
-        # if handler.comm.rank == 0:
-        self.chol = self.chol.T.copy()  # [M^2, chol]
         handler.comm.barrier()
 
-        self.chol_chunk = self.chol_chunk.T.copy()  # [M^2, chol_chunk]
+        self.chol_chunk = self.chol_chunk.T  # [M^2, chol_chunk]
 
         tot_size = handler.allreduce_group(self.chol_chunk.size)
         assert self.chol.size == tot_size
+        del self.chol
diff --git a/ipie/hamiltonians/generic_chunked.py b/ipie/hamiltonians/generic_chunked.py
new file mode 100644
index 00000000..719ec23b
--- /dev/null
+++ b/ipie/hamiltonians/generic_chunked.py
@@ -0,0 +1,130 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Joonho Lee
+#
+#
+
+import numpy
+from ipie.hamiltonians.generic_base import GenericBase
+from ipie.utils.pack_numba import pack_cholesky
+from ipie.utils.backend import arraylib as xp
+from ipie.utils.mpi import make_splits_displacements
+
+try:
+    from mpi4py import MPI
+except ImportError:
+    MPI = None
+
+
+def construct_h1e_mod(chol, h1e, h1e_mod, handler):
+    # Subtract one-body bit following reordering of 2-body operators.
+    # Eqn (17) of [Motta17]_
+    nbasis = h1e.shape[-1]
+    nchol = chol.shape[-1]
+    chol_view = chol.reshape((nbasis, nbasis * nchol))
+    # assert chol_view.__array_interface__['data'][0] == chol.__array_interface__['data'][0]
+    v0 = 0.5 * numpy.dot(
+        chol_view,
+        chol_view.T.conj(),  # conjugate added to account for complex integrals
+    )  # einsum('ikn,jkn->ij', chol_3, chol_3, optimize=True)
+    v0 = handler.scomm.allreduce(v0, op=MPI.SUM)
+    h1e_mod[0, :, :] = h1e[0] - v0
+    h1e_mod[1, :, :] = h1e[1] - v0
+
+
+class GenericRealCholChunked(GenericBase):
+    """Class for ab-initio Hamiltonian with 8-fold real symmetric integrals.
+    Can be created by passing the one and two electron integrals directly.
+    """
+
+    def __init__(
+        self,
+        h1e,
+        chol=None,
+        chol_chunk=None,
+        chol_packed_chunk=None,
+        ecore=0.0,
+        handler=None,
+        verbose=False,
+    ):
+        if not (
+            (chol is not None and chol_chunk is None and chol_packed_chunk is None)
+            or (chol is None and chol_chunk is not None and chol_packed_chunk is not None)
+        ):
+            raise ValueError(
+                "Invalid argument combination. Provide either 'chol' alone or both 'chol_chunk' and 'chol_packed_chunk' together."
+            )
+        super().__init__(h1e, ecore, verbose)
+        self.handler = handler
+        assert (
+            h1e.shape[0] == 2
+        )  # assuming each spin component is given. this should be fixed for GHF...?
+
+        self.sym_idx = numpy.triu_indices(self.nbasis)
+        self.sym_idx_i = self.sym_idx[0].copy()
+        self.sym_idx_j = self.sym_idx[1].copy()
+
+        if chol is not None:
+            self.chol = chol  # [M^2, nchol]
+            self.nchol = self.chol.shape[-1]
+            self.chol = self.chol.reshape((self.nbasis, self.nbasis, self.nchol))
+            cp_shape = (self.nbasis * (self.nbasis + 1) // 2, self.chol.shape[-1])
+            self.chol_packed = numpy.zeros(cp_shape, dtype=self.chol.dtype)
+            pack_cholesky(self.sym_idx[0], self.sym_idx[1], self.chol_packed, self.chol)
+            self.chol = self.chol.reshape((self.nbasis * self.nbasis, self.nchol))
+            self.chunk(handler)
+        else:
+            self.chol_chunk = chol_chunk  # [M^2, nchol]
+            self.chol_packed_chunk = chol_packed_chunk
+
+        chunked_chols = self.chol_chunk.shape[-1]
+        num_chol = handler.scomm.allreduce(chunked_chols, op=MPI.SUM)
+        self.nchol = num_chol
+
+        chol_idxs = [i for i in range(self.nchol)]
+        self.chol_idxs_chunk = handler.scatter_group(chol_idxs)
+
+        assert self.chol_chunk.dtype == numpy.dtype("float64")
+        assert self.chol_packed_chunk.dtype == numpy.dtype("float64")
+
+        self.nchol_chunk = self.chol_chunk.shape[-1]
+        self.nfields = self.nchol
+        assert self.nbasis**2 == self.chol_chunk.shape[0]
+
+        self.chunked = True
+
+        # this is the one-body part that comes out of re-ordering the 2-body operators
+        h1e_mod = numpy.zeros(self.H1.shape, dtype=self.H1.dtype)
+        construct_h1e_mod(self.chol_chunk, self.H1, h1e_mod, handler)
+        self.h1e_mod = xp.array(h1e_mod)
+
+        split_size = make_splits_displacements(num_chol, handler.nmembers)[0]
+        self.chunk_displacements = [0] + numpy.cumsum(split_size).tolist()
+
+        if verbose:
+            mem = self.chol_chunk.nbytes / (1024.0**3)
+            mem_packed = self.chol_packed_chunk.nbytes / (1024.0**3)
+            print("# Number of orbitals: %d" % self.nbasis)
+            print(f"# Approximate memory required by Cholesky vectors {mem:f} GB")
+            print(f"# Approximate memory required by packed Cholesky vectors {mem_packed:f} GB")
+            print(f"# Approximate memory required total {mem_packed + mem:f} GB")
+            print("# Number of Cholesky vectors: %d" % (self.nchol))
+            print("# Number of fields: %d" % (self.nchol))
+            print("# Finished setting up GenericRealChol object.")
+
+    def hijkl(self, i, j, k, l):  # (ik|jl) somehow physicist notation - terrible!!
+        ik = i * self.nbasis + k
+        jl = j * self.nbasis + l
+        return numpy.dot(self.chol[ik], self.chol[jl])
diff --git a/ipie/hamiltonians/utils.py b/ipie/hamiltonians/utils.py
index 99d264f5..15ab863f 100644
--- a/ipie/hamiltonians/utils.py
+++ b/ipie/hamiltonians/utils.py
@@ -72,6 +72,9 @@ def get_hamiltonian(filename, scomm, verbose=False, pack_chol=True):
         if scomm.rank == 0 and pack_chol:
             pack_cholesky(idx[0], idx[1], chol_packed, chol)
         scomm.Barrier()
+        chol_pack_shmem = get_shared_array(scomm, shape, dtype)
+        if scomm.rank == 0:
+            chol_pack_shmem[:] = chol_packed[:]
     else:
         dtype = chol.dtype
         cp_shape = (nbsf * (nbsf + 1) // 2, nchol)
@@ -84,7 +87,12 @@ def get_hamiltonian(filename, scomm, verbose=False, pack_chol=True):
     if verbose:
         print(f"# Time to pack Cholesky vectors: {time.time() - start:.6f}")
 
-    ham = Generic(h1e=hcore, chol=chol, ecore=enuc, verbose=verbose)
+    if shmem and pack_chol:
+        ham = Generic(
+            h1e=hcore, chol=chol, ecore=enuc, shmem=True, chol_packed=chol_packed, verbose=verbose
+        )
+    else:
+        ham = Generic(h1e=hcore, chol=chol, ecore=enuc, verbose=verbose)
 
     return ham
 
diff --git a/ipie/legacy/estimators/generic.py b/ipie/legacy/estimators/generic.py
index 1682e1e9..910a5f92 100644
--- a/ipie/legacy/estimators/generic.py
+++ b/ipie/legacy/estimators/generic.py
@@ -190,6 +190,37 @@ def _exx_compute_batch(rchol_a, rchol_b, GaT_stacked, GbT_stacked, lwalker):
     return exx_vec_b + exx_vec_a
 
 
+# FDM: deprecated remove?
+def local_energy_generic_opt(system, G, Ghalf=None, eri=None):
+    """Compute local energy using half-rotated eri tensor."""
+
+    na = system.nup
+    nb = system.ndown
+    M = system.nbasis
+    assert eri is not None
+
+    vipjq_aa = eri[0, : na**2 * M**2].reshape((na, M, na, M))
+    vipjq_bb = eri[0, na**2 * M**2 : na**2 * M**2 + nb**2 * M**2].reshape(
+        (nb, M, nb, M)
+    )
+    vipjq_ab = eri[0, na**2 * M**2 + nb**2 * M**2 :].reshape((na, M, nb, M))
+
+    Ga, Gb = Ghalf[0], Ghalf[1]
+    # Element wise multiplication.
+    e1b = numpy.sum(system.H1[0] * G[0]) + numpy.sum(system.H1[1] * G[1])
+    # Coulomb
+    eJaa = 0.5 * numpy.einsum("irjs,ir,js", vipjq_aa, Ga, Ga)
+    eJbb = 0.5 * numpy.einsum("irjs,ir,js", vipjq_bb, Gb, Gb)
+    eJab = numpy.einsum("irjs,ir,js", vipjq_ab, Ga, Gb)
+
+    eKaa = -0.5 * numpy.einsum("irjs,is,jr", vipjq_aa, Ga, Ga)
+    eKbb = -0.5 * numpy.einsum("irjs,is,jr", vipjq_bb, Gb, Gb)
+
+    e2b = eJaa + eJbb + eJab + eKaa + eKbb
+
+    return (e1b + e2b + system.ecore, e1b + system.ecore, e2b)
+
+
 def local_energy_generic_cholesky_opt_batched(
     system,
     ham,
diff --git a/ipie/legacy/estimators/local_energy.py b/ipie/legacy/estimators/local_energy.py
index 8de3ce93..53225747 100644
--- a/ipie/legacy/estimators/local_energy.py
+++ b/ipie/legacy/estimators/local_energy.py
@@ -5,10 +5,10 @@
     from ipie.legacy.estimators.ueg import local_energy_ueg
 except ImportError as e:
     print(e)
-from ipie.estimators.generic import local_energy_generic_opt
 from ipie.legacy.estimators.ci import get_hmatel
 from ipie.legacy.estimators.generic import (
     local_energy_generic,
+    local_energy_generic_opt,
     local_energy_generic_cholesky,
     local_energy_generic_cholesky_opt,
     local_energy_generic_cholesky_opt_stochastic,
diff --git a/ipie/legacy/estimators/ueg.py b/ipie/legacy/estimators/ueg.py
index dde01443..210bb0ac 100644
--- a/ipie/legacy/estimators/ueg.py
+++ b/ipie/legacy/estimators/ueg.py
@@ -25,7 +25,7 @@ def coulomb_greens_function(nq, kpq_i, kpq, pmq_i, pmq, Gkpq, Gpmq, G):
             Gpmq[iq] += G[i, idxpmq]
 
 
-def local_energy_ueg(system, ham, G, Ghalf=None, two_rdm=None):
+def local_energy_ueg(system, ham, G, Ghalf=None, two_rdm=None, debug=False):
     """Local energy computation for uniform electron gas
     Parameters
     ----------
@@ -69,20 +69,31 @@ def local_energy_ueg(system, ham, G, Ghalf=None, two_rdm=None):
 
     if two_rdm is None:
         two_rdm = numpy.zeros((2, 2, len(ham.qvecs)), dtype=numpy.complex128)
-    two_rdm[0, 0] = numpy.multiply(Gkpq[0], Gpmq[0]) - Gprod[0]
-    essa = (1.0 / (2.0 * ham.vol)) * ham.vqvec.dot(two_rdm[0, 0])
 
+    two_rdm[0, 0] = numpy.multiply(Gkpq[0], Gpmq[0]) - Gprod[0]
     two_rdm[1, 1] = numpy.multiply(Gkpq[1], Gpmq[1]) - Gprod[1]
-    essb = (1.0 / (2.0 * ham.vol)) * ham.vqvec.dot(two_rdm[1, 1])
-
     two_rdm[0, 1] = numpy.multiply(Gkpq[0], Gpmq[1])
     two_rdm[1, 0] = numpy.multiply(Gkpq[1], Gpmq[0])
-    eos = (1.0 / (2.0 * ham.vol)) * ham.vqvec.dot(two_rdm[0, 1]) + (
-        1.0 / (2.0 * ham.vol)
-    ) * ham.vqvec.dot(two_rdm[1, 0])
 
+    essa = (1.0 / (2.0 * ham.vol)) * ham.vqvec.dot(two_rdm[0, 0])
+    essb = (1.0 / (2.0 * ham.vol)) * ham.vqvec.dot(two_rdm[1, 1])
+    eos = (1.0 / (2.0 * ham.vol)) * (
+            ham.vqvec.dot(two_rdm[0, 1]) + ham.vqvec.dot(two_rdm[1, 0]))
     pe = essa + essb + eos
 
+    if debug:
+        ecoul, exx = 0., 0. 
+
+        for s1 in range(2):
+            exx -= (1.0 / (2.0 * ham.vol)) * ham.vqvec.dot(Gprod[s1])
+
+            for s2 in range(2):
+                ecoul += (1.0 / (2.0 * ham.vol)) * ham.vqvec.dot(numpy.multiply(Gkpq[s1], Gpmq[s2]))
+
+        print(f'\n# ueg ecoul = {ecoul}')
+        print(f'# ueg exx = {exx}')
+        print(f'# ueg e2 = {(ecoul + exx)}')
+
     return (ke + pe, ke, pe)
 
 
diff --git a/ipie/legacy/hamiltonians/_generic.py b/ipie/legacy/hamiltonians/_generic.py
index e361deb5..6b1a2ef8 100644
--- a/ipie/legacy/hamiltonians/_generic.py
+++ b/ipie/legacy/hamiltonians/_generic.py
@@ -140,7 +140,7 @@ def __init__(
             if self.verbose:
                 print("# mixed_precision is used for the propagation")
 
-        if isrealobj(self.chol_vecs.dtype):
+        if isrealobj(self.chol_vecs):
             if verbose:
                 print("# Found real Choleksy integrals.")
             self.cplx_chol = False
@@ -314,7 +314,7 @@ def construct_h1e_mod(chol, h1e, h1e_mod):
     chol_view = chol.reshape((nbasis, nbasis * nchol))
     # assert chol_view.__array_interface__['data'][0] == chol.__array_interface__['data'][0]
     v0 = 0.5 * numpy.dot(
-        chol_view, chol_view.T
+        chol_view, chol_view.T.conj() # Conjugate added to account for complex integrals
     )  # einsum('ikn,jkn->ij', chol_3, chol_3, optimize=True)
     h1e_mod[0, :, :] = h1e[0] - v0
     h1e_mod[1, :, :] = h1e[1] - v0
diff --git a/ipie/legacy/propagation/pw.py b/ipie/legacy/propagation/pw.py
index 379d4f9f..87445311 100644
--- a/ipie/legacy/propagation/pw.py
+++ b/ipie/legacy/propagation/pw.py
@@ -5,7 +5,7 @@
 import numpy
 import scipy
 
-from ipie.estimators.utils import convolve, scipy_fftconvolve
+from ipie.legacy.estimators.utils import convolve, scipy_fftconvolve
 from ipie.legacy.propagation.operations import kinetic_real
 
 
diff --git a/ipie/legacy/propagation/tests/test_planewave.py b/ipie/legacy/propagation/tests/test_planewave.py
index e05b2604..ed12be56 100644
--- a/ipie/legacy/propagation/tests/test_planewave.py
+++ b/ipie/legacy/propagation/tests/test_planewave.py
@@ -1,5 +1,3 @@
-import os
-
 import numpy
 import pytest
 
diff --git a/ipie/legacy/qmc/thermal_afqmc.py b/ipie/legacy/qmc/thermal_afqmc.py
index f4e60a9c..2fb15e54 100644
--- a/ipie/legacy/qmc/thermal_afqmc.py
+++ b/ipie/legacy/qmc/thermal_afqmc.py
@@ -116,10 +116,10 @@ def __init__(
                 self.sha1, self.branch, self.local_mods = get_git_info()
             else:
                 self.sha1 = "None"
-            if verbose:
-                self.sys_info = print_env_info(
-                    self.sha1, self.branch, self.local_mods, self.uuid, comm.size
-                )
+            #if verbose:
+            #    self.sys_info = print_env_info(
+            #        self.sha1, self.branch, self.local_mods, self.uuid, comm.size
+            #    )
         # Hack - this is modified later if running in parallel on
         # initialisation.
         self.root = comm.rank == 0
diff --git a/ipie/legacy/thermal_propagation/continuous.py b/ipie/legacy/thermal_propagation/continuous.py
index a97c6dc4..cfc6dc7e 100644
--- a/ipie/legacy/thermal_propagation/continuous.py
+++ b/ipie/legacy/thermal_propagation/continuous.py
@@ -84,7 +84,7 @@ def __init__(self, options, qmc, system, hamiltonian, trial, verbose=False, lowr
         if verbose:
             print("# Finished setting up propagator.")
 
-    def two_body_propagator(self, walker, system, trial):
+    def two_body_propagator(self, walker, system, trial, xi=None):
         r"""Continuous Hubbard-Statonovich transformation.
 
         Parameters
@@ -97,7 +97,9 @@ def two_body_propagator(self, walker, system, trial):
             Trial wavefunction object.
         """
         # Normally distrubted auxiliary fields.
-        xi = numpy.random.normal(0.0, 1.0, system.nfields)
+        if xi is None: # For debugging.
+            xi = numpy.random.normal(0.0, 1.0, system.nfields)
+
         if self.force_bias:
             P = one_rdm_from_G(walker.G)
             xbar = self.propagator.construct_force_bias(system, P, trial)
@@ -157,7 +159,7 @@ def exponentiate(self, VHS, debug=False):
             print(f"DIFF: {(c2 - phi).sum() / c2.size: 10.8e}")
         return phi
 
-    def propagate_walker_free(self, system, walker, trial, eshift=0):
+    def propagate_walker_free(self, system, walker, trial, eshift=0, xi=None):
         r"""Free projection for continuous HS transformation.
 
         .. Warning::
@@ -173,7 +175,7 @@ def propagate_walker_free(self, system, walker, trial, eshift=0):
         state : :class:`state.State`
             Simulation state.
         """
-        (cmf, cfb, xmxbar, VHS) = self.two_body_propagator(walker, system, trial)
+        (cmf, cfb, xmxbar, VHS) = self.two_body_propagator(walker, system, trial, xi=xi)
         BV = self.exponentiate(VHS)
 
         B = numpy.array([BV.dot(self.BH1[0]), BV.dot(self.BH1[1])])
@@ -206,7 +208,7 @@ def propagate_walker_free(self, system, walker, trial, eshift=0):
         except ZeroDivisionError:
             walker.weight = 0.0
 
-    def propagate_walker_phaseless(self, system, walker, trial, eshift=0):
+    def propagate_walker_phaseless(self, system, walker, trial, eshift=0, xi=None):
         r"""Propagate walker using phaseless approximation.
 
         Uses importance sampling and the hybrid method.
@@ -223,7 +225,7 @@ def propagate_walker_phaseless(self, system, walker, trial, eshift=0):
             Trial wavefunction object.
         """
 
-        (cmf, cfb, xmxbar, VHS) = self.two_body_propagator(walker, system, trial)
+        (cmf, cfb, xmxbar, VHS) = self.two_body_propagator(walker, system, trial, xi=xi)
         BV = self.exponentiate(VHS)
 
         B = numpy.array([BV.dot(self.BH1[0]), BV.dot(self.BH1[1])])
@@ -232,11 +234,15 @@ def propagate_walker_phaseless(self, system, walker, trial, eshift=0):
         # Compute determinant ratio det(1+A')/det(1+A).
         # 1. Current walker's green's function.
         tix = walker.stack.ntime_slices
+        G = walker.greens_function(None, slice_ix=tix, inplace=False)
         # 2. Compute updated green's function.
         walker.stack.update_new(B)
         walker.greens_function(None, slice_ix=tix, inplace=True)
         # 3. Compute det(G/G')
-        M0 = walker.M0
+        M0 = [
+                scipy.linalg.det(G[0], check_finite=False),
+                scipy.linalg.det(G[1], check_finite=False)
+        ]
         Mnew = [
             scipy.linalg.det(walker.G[0], check_finite=False),
             scipy.linalg.det(walker.G[1], check_finite=False),
diff --git a/ipie/legacy/thermal_propagation/generic.py b/ipie/legacy/thermal_propagation/generic.py
index 7454b5df..9c4c63de 100644
--- a/ipie/legacy/thermal_propagation/generic.py
+++ b/ipie/legacy/thermal_propagation/generic.py
@@ -23,7 +23,7 @@ class GenericContinuous(object):
     qmc : :class:`pie.qmc.options.QMCOpts`
         QMC options.
     system : :class:`pie.system.System`
-        System object.
+        System object is actually HAMILTONIAN!!!
     trial : :class:`pie.trial_wavefunctioin.Trial`
         Trial wavefunction object.
     verbose : bool
@@ -91,7 +91,10 @@ def construct_mean_field_shift(self, system, P):
             mf_shift = 1j * P[0].ravel() * system.chol_vecs
             mf_shift += 1j * P[1].ravel() * system.chol_vecs
         else:
+            # Need to reshape `chol_vecs` just to run the einsum lol.
+            system.chol_vecs = system.chol_vecs.T.reshape(system.nchol, system.nbasis, system.nbasis)
             mf_shift = 1j * numpy.einsum("lpq,spq->l", system.chol_vecs, P)
+            system.chol_vecs = system.chol_vecs.reshape(system.nchol, system.nbasis**2).T
         return mf_shift
 
     def construct_one_body_propagator(self, system, dt):
diff --git a/ipie/legacy/thermal_propagation/planewave.py b/ipie/legacy/thermal_propagation/planewave.py
index 8b258c38..a0a92871 100644
--- a/ipie/legacy/thermal_propagation/planewave.py
+++ b/ipie/legacy/thermal_propagation/planewave.py
@@ -105,7 +105,6 @@ def construct_one_body_propagator(self, hamiltonian, dt):
         """
         H1 = hamiltonian.h1e_mod
         I = numpy.identity(H1[0].shape[0], dtype=H1.dtype)
-        print(f"hamiltonian.mu = {hamiltonian.mu}")
         # No spin dependence for the moment.
         self.BH1 = numpy.array(
             [
@@ -228,7 +227,7 @@ def propagate_greens_function(self, walker, B, Binv):
             walker.G[0] = B[0].dot(walker.G[0]).dot(Binv[0])
             walker.G[1] = B[1].dot(walker.G[1]).dot(Binv[1])
 
-    def two_body_propagator(self, walker, hamiltonian, force_bias=True):
+    def two_body_propagator(self, walker, hamiltonian, force_bias=True, xi=None):
         """It appliese the two-body propagator
         Parameters
         ----------
@@ -249,7 +248,8 @@ def two_body_propagator(self, walker, hamiltonian, force_bias=True):
         """
 
         # Normally distrubted auxiliary fields.
-        xi = numpy.random.normal(0.0, 1.0, hamiltonian.nfields)
+        if xi is None:
+            xi = numpy.random.normal(0.0, 1.0, hamiltonian.nfields)
 
         # Optimal force bias.
         xbar = numpy.zeros(hamiltonian.nfields)
@@ -472,7 +472,7 @@ def propagate_walker_free_low_rank(self, system, walker, trial, eshift=0, force_
         except ZeroDivisionError:
             walker.weight = 0.0
 
-    def propagate_walker_phaseless_full_rank(self, hamiltonian, walker, trial, eshift=0):
+    def propagate_walker_phaseless_full_rank(self, hamiltonian, walker, trial, eshift=0, xi=None):
         # """Phaseless propagator
         # Parameters
         # ----------
@@ -486,7 +486,7 @@ def propagate_walker_phaseless_full_rank(self, hamiltonian, walker, trial, eshif
         # -------
         # """
 
-        (cmf, cfb, xmxbar, VHS) = self.two_body_propagator(walker, hamiltonian, True)
+        (cmf, cfb, xmxbar, VHS) = self.two_body_propagator(walker, hamiltonian, True, xi=xi)
         BV = self.exponentiate(VHS)  # could use a power-series method to build this
 
         B = numpy.array(
@@ -525,7 +525,10 @@ def propagate_walker_phaseless_full_rank(self, hamiltonian, walker, trial, eshif
             walker.greens_function(None, slice_ix=tix, inplace=True)
 
         # 3. Compute det(G/G')
-        M0 = walker.M0
+        M0 = [
+                scipy.linalg.det(G[0], check_finite=False),
+                scipy.linalg.det(G[1], check_finite=False)
+        ]
         Mnew = numpy.array(
             [
                 scipy.linalg.det(walker.G[0], check_finite=False),
@@ -554,7 +557,7 @@ def propagate_walker_phaseless_full_rank(self, hamiltonian, walker, trial, eshif
         except ZeroDivisionError:
             walker.weight = 0.0
 
-    def propagate_walker_phaseless_low_rank(self, hamiltonian, walker, trial, eshift=0):
+    def propagate_walker_phaseless_low_rank(self, hamiltonian, walker, trial, eshift=0, xi=None):
         # """Phaseless propagator
         # Parameters
         # ----------
@@ -567,7 +570,7 @@ def propagate_walker_phaseless_low_rank(self, hamiltonian, walker, trial, eshift
         # Returns
         # -------
         # """
-        (cmf, cfb, xmxbar, VHS) = self.two_body_propagator(walker, hamiltonian, True)
+        (cmf, cfb, xmxbar, VHS) = self.two_body_propagator(walker, hamiltonian, True, xi=xi)
         BV = self.exponentiate(VHS)  # could use a power-series method to build this
 
         B = numpy.array(
diff --git a/ipie/legacy/trial_density_matrices/mean_field.py b/ipie/legacy/trial_density_matrices/mean_field.py
index 46c01bb4..1fe8e1e2 100644
--- a/ipie/legacy/trial_density_matrices/mean_field.py
+++ b/ipie/legacy/trial_density_matrices/mean_field.py
@@ -41,7 +41,7 @@ def thermal_hartree_fock(self, system, beta):
         mu_old = self.mu
         P = self.P.copy()
         if self.verbose:
-            print("# Determining Thermal Hartree--Fock Density Matrix.")
+            print("# Determining Thermal Hartree-Fock Density Matrix.")
         for it in range(self.max_macro_it):
             if self.verbose:
                 print(f"# Macro iteration: {it}")
@@ -49,7 +49,7 @@ def thermal_hartree_fock(self, system, beta):
             rho = numpy.array([scipy.linalg.expm(-dt * HMF[0]), scipy.linalg.expm(-dt * HMF[1])])
             if self.find_mu:
                 mu = find_chemical_potential(
-                    system,
+                    system._alt_convention,
                     rho,
                     dt,
                     self.num_bins,
@@ -96,16 +96,16 @@ def scf(self, system, beta, mu, P):
             change = numpy.linalg.norm(Pnew - Pold)
             if change < self.deps:
                 break
-            if self.verbose:
-                N = particle_number(P).real
-                E = local_energy(system, P)[0].real
-                S = entropy(beta, mu, HMF)
-                omega = E - mu * N - 1.0 / beta * S
-                print(
-                    " # Iteration: {:4d} dP: {:13.8e} Omega: {:13.8e}".format(
-                        it, change, omega.real
-                    )
-                )
+            #if self.verbose:
+            #    N = particle_number(P).real
+            #    E = local_energy(system, P)[0].real
+            #    S = entropy(beta, mu, HMF)
+            #    omega = E - mu * N - 1.0 / beta * S
+            #    print(
+            #        " # Iteration: {:4d} dP: {:13.8e} Omega: {:13.8e}".format(
+            #            it, change, omega.real
+            #        )
+            #    )
             Pold = Pnew.copy()
         if self.verbose:
             N = particle_number(P).real
diff --git a/ipie/legacy/trial_wavefunction/multi_slater.py b/ipie/legacy/trial_wavefunction/multi_slater.py
index 1553c9f2..d198a5c0 100644
--- a/ipie/legacy/trial_wavefunction/multi_slater.py
+++ b/ipie/legacy/trial_wavefunction/multi_slater.py
@@ -30,6 +30,7 @@ def __init__(
         nbasis=None,
         options={},
         init=None,
+        cplx=False,
         verbose=False,
         orbs=None,
     ):
@@ -48,7 +49,7 @@ def __init__(
         else:
             self.psi = wfn[1]
             imag_norm = numpy.sum(self.psi.imag.ravel() * self.psi.imag.ravel())
-            if imag_norm <= 1e-8:
+            if (not cplx) and (imag_norm <= 1e-8):
                 # print("# making trial wavefunction MO coefficient real")
                 self.psi = numpy.array(self.psi.real, dtype=numpy.float64)
             self.coeffs = numpy.array(wfn[0], dtype=numpy.complex128)
diff --git a/ipie/lib/wicks/CMakeLists.txt b/ipie/lib/wicks/CMakeLists.txt
index cec6ff2e..7f019eaa 100644
--- a/ipie/lib/wicks/CMakeLists.txt
+++ b/ipie/lib/wicks/CMakeLists.txt
@@ -7,6 +7,7 @@ add_library(wicks_helper SHARED
     ${PROJECT_SOURCE_DIR}/density_matrix.c
     )
 
+set(CMAKE_C_FLAGS "-O3")
 set_target_properties(wicks_helper PROPERTIES
     LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}
     )
diff --git a/ipie/lib/wicks/determinant_utils.c b/ipie/lib/wicks/determinant_utils.c
index 016f15f4..fe0e1d7f 100644
--- a/ipie/lib/wicks/determinant_utils.c
+++ b/ipie/lib/wicks/determinant_utils.c
@@ -32,24 +32,21 @@ void encode_det(
     )
 {
   for (int i = 0; i < DET_LEN; i++) {
-    det[i] = 0;
+    det[i] = (u_int64_t)(0);
   }
   u_int64_t mask = 1;
   for (int i = 0; i < nocca; i++) {
     int spin_occ = 2*occa[i];
     int det_ind = spin_occ / DET_SIZE;
     int det_pos = spin_occ % DET_SIZE;
-    /*printf("alpha : %d %d %d\n", spin_occ, det_ind, det_pos);*/
     det[det_ind] |= (mask << det_pos);
   }
   for (int i = 0; i < noccb; i++) {
     int spin_occ = 2*occb[i] + 1;
     int det_ind = spin_occ / DET_SIZE;
     int det_pos = spin_occ % DET_SIZE;
-    /*printf("beta:  %d %d %d\n", spin_occ, det_ind, det_pos);*/
     det[det_ind] |= (mask << det_pos);
   }
-  /*printf("end\n");*/
 }
 
 int count_set_bits(const u_int64_t *det)
@@ -72,7 +69,6 @@ int get_excitation_level(
   int excit_level = 0;
   for (int i = 0; i < DET_LEN; i++) {
       excit_level += count_set_bits_single(deta[i]^detb[i]);
-      /*printf("%d %d %llu %llu %d\n", i, excit_level, deta[i], detb[i], count_set_bits_single(deta[i]^detb[i]));*/
   }
   return excit_level / 2;
 }
@@ -100,16 +96,6 @@ void decode_det(
   }
 }
 
-/*bool is_set(*/
-    /*u_int64_t *det,*/
-    /*int loc*/
-    /*)*/
-/*{*/
-  /*bool set = false;*/
-  /*for (int i = 0; i < DET_LEN; i++) {*/
-    /*set &= det & (loc << diff[0])*/
-  /*}*/
-/*}*/
 
 void build_set_mask(
     u_int64_t *mask,
@@ -154,7 +140,7 @@ void get_ia(
     int* ia)
 {
   int diff[2];
-  u_int64_t delta[2];
+  u_int64_t delta[DET_LEN];
   for (int i = 0; i < DET_LEN; i++) {
     delta[i] = det_bra[i] ^ det_ket[i];
   }
@@ -181,10 +167,10 @@ int get_perm_ia(
   u_int64_t occ_to_count[DET_LEN];
   u_int64_t loc = 1;
   for (int i = 0; i < DET_LEN; i++) {
-    and_mask[i] = 0; // all bits set to 0
-    mask_i[i] = 0; // all bits set to 0
-    mask_a[i] = 0; // all bits set to 0
-    occ_to_count[i] = 0;
+    and_mask[i] = (u_int64_t)(0); // all bits set to 0
+    mask_i[i] = (u_int64_t)(0); // all bits set to 0
+    mask_a[i] = (u_int64_t)(0); // all bits set to 0
+    occ_to_count[i] = (u_int64_t)(0);
   }
   // check bit a is occupied or bit i is unoccupied.
   // else just count set bits between i and a.
diff --git a/ipie/lib/wicks/determinant_utils.h b/ipie/lib/wicks/determinant_utils.h
index 1882caff..711047d2 100644
--- a/ipie/lib/wicks/determinant_utils.h
+++ b/ipie/lib/wicks/determinant_utils.h
@@ -2,51 +2,19 @@
 
 #include <stdlib.h>
 
-#define DET_LEN   2
+#define DET_LEN 4
 #define DET_SIZE 64
 
-void encode_dets(
-    const int *occsa,
-    const int *occsb,
-    u_int64_t *dets,
-    const size_t nocca,
-    const size_t noccb,
-    const size_t ndet);
-void encode_det(
-    const int *occa,
-    const int *occb,
-    u_int64_t  *det,
-    const size_t nocca,
-    const size_t noccb);
+void encode_dets(const int *occsa, const int *occsb, u_int64_t *dets,
+                 const size_t nocca, const size_t noccb, const size_t ndet);
+void encode_det(const int *occa, const int *occb, u_int64_t *det,
+                const size_t nocca, const size_t noccb);
 int count_set_bits(const u_int64_t *det);
 int count_set_bits_single(const u_int64_t det);
-int get_excitation_level(
-    const u_int64_t *deta,
-    const u_int64_t *detb);
-void decode_det(
-    const u_int64_t *det,
-    int *occs,
-    const size_t nel);
-void get_ia(
-    u_int64_t *det_bra,
-    u_int64_t *det_ket,
-    int *ia);
-int get_perm_ia(
-    u_int64_t *det,
-    int i,
-    int a);
-void bitwise_subtract(
-    u_int64_t *deta,
-    u_int64_t *detb,
-    u_int64_t *result
-    );
-void bitwise_and(
-    u_int64_t *deta,
-    u_int64_t *detb,
-    u_int64_t *result
-    );
-void build_set_mask(
-    u_int64_t *mask,
-    int det_loc,
-    int det_ind);
-
+int get_excitation_level(const u_int64_t *deta, const u_int64_t *detb);
+void decode_det(const u_int64_t *det, int *occs, const size_t nel);
+void get_ia(u_int64_t *det_bra, u_int64_t *det_ket, int *ia);
+int get_perm_ia(u_int64_t *det, int i, int a);
+void bitwise_subtract(u_int64_t *deta, u_int64_t *detb, u_int64_t *result);
+void bitwise_and(u_int64_t *deta, u_int64_t *detb, u_int64_t *result);
+void build_set_mask(u_int64_t *mask, int det_loc, int det_ind);
diff --git a/ipie/lib/wicks/test_wicks_helper.py b/ipie/lib/wicks/test_wicks_helper.py
index efb9bda7..8edd47d0 100644
--- a/ipie/lib/wicks/test_wicks_helper.py
+++ b/ipie/lib/wicks/test_wicks_helper.py
@@ -1,4 +1,3 @@
-
 # Copyright 2022 The ipie Developers. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -20,100 +19,101 @@
 import pytest
 
 try:
-    from ipie.lib.wicks.wicks_helper import (compute_opdm, count_set_bits,
-                                             decode_det, encode_det,
-                                             encode_dets, get_excitation_level,
-                                             get_ia, get_perm_ia,
-                                             print_bitstring)
+    from ipie.lib.wicks.wicks_helper import (
+        compute_opdm,
+        count_set_bits,
+        decode_det,
+        encode_det,
+        encode_dets,
+        get_excitation_level,
+        get_ia,
+        get_perm_ia,
+    )
+
     no_wicks = False
-except ImportError:
+except:
     no_wicks = True
 
+
 @pytest.mark.wicks
 @pytest.mark.skipif(no_wicks, reason="lib.wicks not found.")
 @pytest.mark.parametrize(
-        "test_input,expected",
-        [
-            (([], [0,1,2,3]), ['0b10101010', '0b0']),
-            (([0,3], [0,1,2,3]), ['0b11101011', '0b0']),
-            (([1], [0,1,2,3]), ['0b10101110', '0b0']),
-            (([], [0,1]), ['0b1010', '0b0']),
-            (([1,35], [0,34]), ['0b110', '0b1100000'])
-        ]
-            )
+    "test_input,expected",
+    [
+        (([], [0, 1, 2, 3]), ["0b10101010", "0b0", "0b0", "0b0"]),
+        (([0, 3], [0, 1, 2, 3]), ["0b11101011", "0b0", "0b0", "0b0"]),
+        (([1], [0, 1, 2, 3]), ["0b10101110", "0b0", "0b0", "0b0"]),
+        (([], [0, 1]), ["0b1010", "0b0", "0b0", "0b0"]),
+        (([0, 32, 64, 96], [1, 33, 65, 97]), ["0b1001", "0b1001", "0b1001", "0b1001"]),
+    ],
+)
 def test_encode_det(test_input, expected):
     a, b = test_input
     a = np.array(a, dtype=np.int32)
     b = np.array(b, dtype=np.int32)
     det = encode_det(a, b)
     for i in range(len(det)):
-        # print(i, bin(det[i]), expected[i])
-        # # print(i, bin(det[i]), expected[i])
         assert bin(det[i]) == expected[i]
 
+
 @pytest.mark.wicks
 @pytest.mark.skipif(no_wicks, reason="lib.wicks not found.")
 def test_encode_dets():
-    occsa = np.array(
-            [[0,1,2,3], [0,1,2,4]],
-            dtype=np.int32
-            )
-    occsb = np.array(
-            [[0, 1, 2, 3], [0,1,2,3]],
-            dtype=np.int32
-            )
+    occsa = np.array([[0, 1, 2, 3], [0, 1, 2, 4]], dtype=np.int32)
+    occsb = np.array([[0, 1, 2, 3], [0, 1, 2, 3]], dtype=np.int32)
     dets = encode_dets(occsa, occsb)
     for i in range(len(occsa)):
         d = encode_det(occsa[i], occsb[i])
         assert (dets[i] == d).all()
 
 
-
 @pytest.mark.wicks
 @pytest.mark.skipif(no_wicks, reason="lib.wicks not found.")
 @pytest.mark.parametrize(
-        "test_input,expected",
-        [
-            ([15,0], 4),
-            ([37,0], 3),
-            ([1001,0], 7)
-        ]
-            )
+    "test_input,expected",
+    [
+        ([15, 0, 0, 0], 4),
+        ([37, 0, 0, 0], 3),
+        ([1001, 1001, 1001, 1001], 28),
+        ([(1 << 64) - 1] * 4, 64 * 4),
+    ],
+)
 def test_count_set_bits(test_input, expected):
     nset = count_set_bits(np.array(test_input, dtype=np.uint64))
     assert nset == expected
 
+
 @pytest.mark.wicks
 @pytest.mark.skipif(no_wicks, reason="lib.wicks not found.")
 @pytest.mark.parametrize(
-        "test_input,expected",
-        [
-            # (([2,0], [1,0]), 1),
-            # (([12,0], [3,0]), 2),
-            (([1688832680394751, 2], [1125899906842623, 0]), 2)
-        ]
-            )
+    "test_input,expected",
+    [
+        (([2, 0, 0, 0], [1, 0, 0, 0]), 1),
+        (([12, 0, 0, 0], [3, 0, 0, 0]), 2),
+        (([1688832680394751, 2, 0, 0], [1125899906842623, 0, 0, 0]), 2),
+        (([(1 << 64) - 1] * 4, [(1 << 64) - 1] * 3 + [((1 << 64) - 1) >> 8]), 4),
+    ],
+)
 def test_get_excitation_level(test_input, expected):
-    # print(bin(test_input[0]), bin(test_input[1]))
-    # print(bin(test_input[0]^test_input[1]))
     nset = get_excitation_level(
-            np.array(test_input[0], dtype=np.uint64),
-            np.array(test_input[1], dtype=np.uint64)
-            )
+        np.array(test_input[0], dtype=np.uint64), np.array(test_input[1], dtype=np.uint64)
+    )
     assert nset == expected
 
+
 @pytest.mark.wicks
 @pytest.mark.skipif(no_wicks, reason="lib.wicks not found.")
 @pytest.mark.parametrize(
-        "test_input,expected",
-        [
-            (np.array([12,0], dtype=np.uint64), [2,3]),
-            (np.array([5,0], dtype=np.uint64), [0,2]),
-        ]
-            )
+    "test_input,expected",
+    [
+        (np.array([12, 0, 0, 0], dtype=np.uint64), [2, 3]),
+        (np.array([5, 0, 0, 0], dtype=np.uint64), [0, 2]),
+        (np.array([5, 0, 0, 0], dtype=np.uint64), [0, 2]),
+        (np.array([(1 << 64) - 1] * 4, dtype=np.uint64), list(range(64 * 4))),
+    ],
+)
 def test_decode_det(test_input, expected):
     out = np.zeros(len(expected), dtype=np.int32)
-    # print(out.size, out.shape, expected, np.array(expected).shape)
     decode_det(test_input, out)
     assert (out == expected).all()
 
@@ -122,53 +122,49 @@ def test_decode_det(test_input, expected):
 @pytest.mark.skipif(no_wicks, reason="lib.wicks not found.")
 def test_get_ia():
     # <1100|0^3|1010>
-    ket = encode_det(
-            np.array([], dtype=np.int32),
-            np.array([0,1], dtype=np.int32)
-            )
-    bra = encode_det(
-            np.array([0], dtype=np.int32),
-            np.array([0], dtype=np.int32)
-            )
+    ket = encode_det(np.array([], dtype=np.int32), np.array([0, 1], dtype=np.int32))
+    bra = encode_det(np.array([0], dtype=np.int32), np.array([0], dtype=np.int32))
     ia = np.zeros(2, dtype=np.int32)
     get_ia(bra, ket, ia)
     assert (ia == [3, 0]).all()
+    ket = [(1 << 64) - 1] * 4
+    bra = [(1 << 64) - 1] * 4
+    bra[1] = bra[1] >> 2
+    bra = np.array(bra, dtype=np.uint64)
+    ket = np.array(ket, dtype=np.uint64)
+    get_ia(bra, ket, ia)
+    assert (ia == [126, 127]).all()
+
 
 @pytest.mark.wicks
 @pytest.mark.skipif(no_wicks, reason="lib.wicks not found.")
 @pytest.mark.parametrize(
-        "test_input,expected",
-        [
-            ([[], [0,1], [3,0]], -1),
-            ([[0,14//2], [0,9//2], [14, 8]], -1),
-            ([[0,4//2], [1,13//2], [13, 17]], 1),
-        ]
-            )
+    "test_input,expected",
+    [
+        ([[], [0, 1], [3, 0]], -1),
+        ([[0, 14 // 2], [0, 9 // 2], [14, 8]], -1),
+        ([[0, 4 // 2], [1, 13 // 2], [13, 17]], 1),
+    ],
+)
 def test_get_perm_ia(test_input, expected):
     # perm(0^3|0101>) = -1
     # |0101> = 1010 (binary)
     o1, o2, ia = test_input
-    ket = encode_det(
-            np.array(o1, dtype=np.int32),
-            np.array(o2, dtype=np.int32)
-            )
-    print(bin(ket[0]), o1, o2, ia, 2*np.array(ia))
+    ket = encode_det(np.array(o1, dtype=np.int32), np.array(o2, dtype=np.int32))
     perm = get_perm_ia(ket, ia[0], ia[1])
     assert perm == expected
 
+
 @pytest.mark.wicks
 @pytest.mark.skipif(no_wicks, reason="lib.wicks not found.")
 def test_get_perm_ia_long():
-    # perm(0^3|0101>) = -1
-    # |0101> = 1010 (binary)
-    ket = np.array([3236962232172543, 0], dtype=np.uint64)
+    ket = np.array([3236962232172543, 0, 0, 0], dtype=np.uint64)
     out = np.zeros(50, dtype=np.int32)
     decode_det(ket, out)
-    print(out)
-    print(bin(ket[0]), 51, 47)
     perm = get_perm_ia(ket, 51, 47)
     assert perm == 1
 
+
 @pytest.mark.wicks
 @pytest.mark.skipif(no_wicks, reason="lib.wicks not found.")
 def test_compute_opdm():
@@ -176,25 +172,9 @@ def test_compute_opdm():
     # |0101> = 1010 (binary)
     coeffs = np.array([0.55, 0.1333, 0.001, 0.44])
     dets = []
-    dets.append(encode_det(
-            np.array([0], dtype=np.int32),
-            np.array([0], dtype=np.int32)
-            ))
-    dets.append(encode_det(
-            np.array([0], dtype=np.int32),
-            np.array([1], dtype=np.int32)
-            ))
-    dets.append(encode_det(
-            np.array([1], dtype=np.int32),
-            np.array([0], dtype=np.int32)
-            ))
-    dets.append(encode_det(
-            np.array([1], dtype=np.int32),
-            np.array([1], dtype=np.int32)
-            ))
+    dets.append(encode_det(np.array([0], dtype=np.int32), np.array([0], dtype=np.int32)))
+    dets.append(encode_det(np.array([0], dtype=np.int32), np.array([1], dtype=np.int32)))
+    dets.append(encode_det(np.array([1], dtype=np.int32), np.array([0], dtype=np.int32)))
+    dets.append(encode_det(np.array([1], dtype=np.int32), np.array([1], dtype=np.int32)))
     dets = np.array(dets, dtype=np.ulonglong)
-    P = compute_opdm(
-            coeffs,
-            dets,
-            4,
-            2)
+    P = compute_opdm(coeffs, dets, 4, 2)
diff --git a/ipie/lib/wicks/wicks_helper.py b/ipie/lib/wicks/wicks_helper.py
index da6fa5ca..1fc01dcc 100644
--- a/ipie/lib/wicks/wicks_helper.py
+++ b/ipie/lib/wicks/wicks_helper.py
@@ -26,7 +26,8 @@
     _wicks_helper = np.ctypeslib.load_library("libwicks_helper", _path)
 except OSError:
     raise ImportError
-DET_LEN = 2
+DET_LEN = 4
+
 
 
 def encode_dets(occsa, occsb):
@@ -57,9 +58,7 @@ def encode_dets(occsa, occsb):
     fun.argtypes = [
         ndpointer(shape=(ndets, nocca), dtype=ctypes.c_int, flags="C_CONTIGUOUS"),
         ndpointer(shape=(ndets, noccb), dtype=ctypes.c_int, flags="C_CONTIGUOUS"),
-        ndpointer(
-            shape=(ndets, DET_LEN), dtype=ctypes.c_ulonglong, flags="C_CONTIGUOUS"
-        ),
+        ndpointer(shape=(ndets, DET_LEN), dtype=ctypes.c_ulonglong, flags="C_CONTIGUOUS"),
         ctypes.c_size_t,
         ctypes.c_size_t,
         ctypes.c_size_t,
@@ -102,7 +101,7 @@ def encode_det(a, b):
         ctypes.c_size_t,
     ]
     out = np.zeros(DET_LEN, dtype=np.uint64)
-    det = _wicks_helper.encode_det(a, b, out, a.size, b.size)
+    _wicks_helper.encode_det(a, b, out, a.size, b.size)
     return out
 
 
@@ -121,9 +120,7 @@ def count_set_bits(a):
     """
     fun = _wicks_helper.count_set_bits
     fun.restype = ctypes.c_int
-    fun.argtypes = [
-        ndpointer(shape=(DET_LEN), dtype=ctypes.c_ulonglong, flags="C_CONTIGUOUS")
-    ]
+    fun.argtypes = [ndpointer(shape=(DET_LEN), dtype=ctypes.c_ulonglong, flags="C_CONTIGUOUS")]
     nset = _wicks_helper.count_set_bits(a)
     return nset
 
@@ -174,7 +171,6 @@ def decode_det(det, occs):
         ndpointer(shape=(occs.size), dtype=ctypes.c_int, flags="C_CONTIGUOUS"),
         ctypes.c_size_t,
     ]
-    print(det.size, det.shape, occs.size, occs.shape)
     _wicks_helper.decode_det(det, occs, occs.size)
 
 
@@ -228,7 +224,6 @@ def get_perm_ia(det_ket, i, a):
         ctypes.c_int,
         ctypes.c_int,
     ]
-    # print(det_ket, type(det_ket), det_ket.dtype)
     perm = _wicks_helper.get_perm_ia(det_ket, i, a)
     return perm
 
@@ -257,9 +252,7 @@ def compute_opdm(ci_coeffs, dets, norbs, nelec):
         fun.restype = None
         fun.argtypes = [
             ndpointer(np.complex128, flags="C_CONTIGUOUS"),
-            ndpointer(
-                shape=(ndets, DET_LEN), dtype=ctypes.c_ulonglong, flags="C_CONTIGUOUS"
-            ),
+            ndpointer(shape=(ndets, DET_LEN), dtype=ctypes.c_ulonglong, flags="C_CONTIGUOUS"),
             ndpointer(np.complex128, flags="C_CONTIGUOUS"),
             ndpointer(ctypes.c_int, flags="C_CONTIGUOUS"),
             ctypes.c_size_t,
@@ -271,9 +264,7 @@ def compute_opdm(ci_coeffs, dets, norbs, nelec):
         fun.restype = None
         fun.argtypes = [
             ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"),
-            ndpointer(
-                shape=(ndets, DET_LEN), dtype=ctypes.c_ulonglong, flags="C_CONTIGUOUS"
-            ),
+            ndpointer(shape=(ndets, DET_LEN), dtype=ctypes.c_ulonglong, flags="C_CONTIGUOUS"),
             ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"),
             ndpointer(ctypes.c_int, flags="C_CONTIGUOUS"),
             ctypes.c_size_t,
@@ -306,7 +297,6 @@ def convert_phase(occa, occb):
     ndet = len(occa)
     phases = np.zeros(ndet)
     for i in range(ndet):
-        doubles = list(set(occa[i]) & set(occb[i]))
         occa0 = np.array(occa[i])
         occb0 = np.array(occb[i])
 
@@ -320,11 +310,9 @@ def convert_phase(occa, occb):
     return phases
 
 
-def print_bitstring(bitstring, nbits=64):
+def get_bitstring(bitstring, nbits=64):
     mask = np.uint64(1)
     out = ""
     for bs in bitstring:
-        out += "".join(
-            "1" if bs & (mask << np.uint64(i)) else "0" for i in range(nbits)
-        )
+        out += "".join("1" if bs & (mask << np.uint64(i)) else "0" for i in range(nbits))
     return out[::-1]
diff --git a/ipie/propagation/force_bias.py b/ipie/propagation/force_bias.py
index 836904f2..66f72a4e 100644
--- a/ipie/propagation/force_bias.py
+++ b/ipie/propagation/force_bias.py
@@ -141,7 +141,7 @@ def construct_force_bias_batch_single_det_chunked(hamiltonian, walkers, trial, h
         Force bias.
     """
     assert hamiltonian.chunked
-    assert xp.isrealobj(trial._rchola)
+    assert xp.isrealobj(trial._rchola_chunk)
 
     Ghalfa = walkers.Ghalfa.reshape(walkers.nwalkers, walkers.nup * hamiltonian.nbasis)
     Ghalfb = walkers.Ghalfb.reshape(walkers.nwalkers, walkers.ndown * hamiltonian.nbasis)
diff --git a/ipie/propagation/operations.py b/ipie/propagation/operations.py
index f729520a..82a99a7d 100644
--- a/ipie/propagation/operations.py
+++ b/ipie/propagation/operations.py
@@ -103,8 +103,9 @@ def apply_exponential_batch(phi, VHS, exp_nmax):
     xp.copyto(Temp, phi)
     if config.get_option("use_gpu"):
         for n in range(1, exp_nmax + 1):
-            Temp = xp.einsum("wik,wkj->wij", VHS, Temp, optimize=True) / n
+            Temp = xp.matmul(VHS, Temp) / n  # matmul use much less GPU memory than einsum
             phi += Temp
+        del Temp
     else:
         for iw in range(phi.shape[0]):
             for n in range(1, exp_nmax + 1):
diff --git a/ipie/propagation/phaseless_base.py b/ipie/propagation/phaseless_base.py
index 57f9b3ff..d11daa2f 100644
--- a/ipie/propagation/phaseless_base.py
+++ b/ipie/propagation/phaseless_base.py
@@ -5,15 +5,25 @@
 from ipie.propagation.continuous_base import ContinuousBase
 from ipie.propagation.operations import propagate_one_body
 from ipie.utils.backend import arraylib as xp
-from ipie.utils.backend import synchronize
+from ipie.utils.backend import synchronize, cast_to_device
 
 import plum
 from ipie.trial_wavefunction.wavefunction_base import TrialWavefunctionBase
 from ipie.hamiltonians.generic import GenericRealChol, GenericComplexChol
+from ipie.hamiltonians.generic_chunked import GenericRealCholChunked
+from typing import Union
+
+try:
+    from mpi4py import MPI
+except ImportError:
+    MPI = None
+from ipie.utils.mpi import make_splits_displacements
 
 
 @plum.dispatch
-def construct_one_body_propagator(hamiltonian: GenericRealChol, mf_shift: xp.ndarray, dt: float):
+def construct_one_body_propagator(
+    hamiltonian: Union[GenericRealChol, GenericRealCholChunked], mf_shift: xp.ndarray, dt: float
+):
     r"""Construct mean-field shifted one-body propagator.
 
     .. math::
@@ -29,10 +39,31 @@ def construct_one_body_propagator(hamiltonian: GenericRealChol, mf_shift: xp.nda
         Timestep.
     """
     nb = hamiltonian.nbasis
-    shift = 1j * numpy.einsum("mx,x->m", hamiltonian.chol, mf_shift).reshape(nb, nb)
-    H1 = hamiltonian.h1e_mod - numpy.array([shift, shift])
-    expH1 = numpy.array(
-        [scipy.linalg.expm(-0.5 * dt * H1[0]), scipy.linalg.expm(-0.5 * dt * H1[1])]
+    if hamiltonian.chunked:
+        start_n = hamiltonian.chunk_displacements[hamiltonian.handler.srank]
+        end_n = hamiltonian.chunk_displacements[hamiltonian.handler.srank + 1]
+        if hasattr(mf_shift, "get"):
+            shift = 1j * numpy.einsum(
+                "mx,x->m", hamiltonian.chol_chunk, mf_shift.get()[start_n:end_n]
+            ).reshape(nb, nb)
+        else:
+            shift = 1j * numpy.einsum(
+                "mx,x->m", hamiltonian.chol_chunk, mf_shift[start_n:end_n]
+            ).reshape(nb, nb)
+        if MPI is None:
+            raise ImportError("mpi4py is not installed.")
+        else:
+            shift = hamiltonian.handler.scomm.allreduce(shift, op=MPI.SUM)
+    else:
+        shift = 1j * numpy.einsum("mx,x->m", hamiltonian.chol, mf_shift).reshape(nb, nb)
+    shift = xp.array(shift)
+    H1 = hamiltonian.h1e_mod - xp.array([shift, shift])
+    if hasattr(H1, "get"):
+        H1_numpy = H1.get()
+    else:
+        H1_numpy = H1
+    expH1 = xp.array(
+        [scipy.linalg.expm(-0.5 * dt * H1_numpy[0]), scipy.linalg.expm(-0.5 * dt * H1_numpy[1])]
     )
     return expH1
 
@@ -41,9 +72,8 @@ def construct_one_body_propagator(hamiltonian: GenericRealChol, mf_shift: xp.nda
 def construct_one_body_propagator(hamiltonian: GenericComplexChol, mf_shift: xp.ndarray, dt: float):
     nb = hamiltonian.nbasis
     nchol = hamiltonian.nchol
-    shift = xp.zeros((nb, nb), dtype=hamiltonian.chol.dtype)
+    shift = numpy.zeros((nb, nb), dtype=hamiltonian.chol.dtype)
     shift = 1j * numpy.einsum("mx,x->m", hamiltonian.A, mf_shift[:nchol]).reshape(nb, nb)
-
     shift += 1j * numpy.einsum("mx,x->m", hamiltonian.B, mf_shift[nchol:]).reshape(nb, nb)
 
     H1 = hamiltonian.h1e_mod - numpy.array([shift, shift])
@@ -53,6 +83,49 @@ def construct_one_body_propagator(hamiltonian: GenericComplexChol, mf_shift: xp.
     return expH1
 
 
+@plum.dispatch
+def construct_mean_field_shift(hamiltonian: GenericRealCholChunked, trial: TrialWavefunctionBase):
+    r"""Compute mean field shift.
+
+    .. math::
+
+        \bar{v}_n = \sum_{ik\sigma} v_{(ik),n} G_{ik\sigma}
+
+    """
+    # hamiltonian.chol [X, M^2]
+    Gcharge = (trial.G[0] + trial.G[1]).ravel()
+    # Use numpy to reduce GPU memory use at this point, otherwise will be a problem of large chol cases
+    tmp_real = numpy.dot(hamiltonian.chol_chunk.T, Gcharge.real)
+    tmp_imag = numpy.dot(hamiltonian.chol_chunk.T, Gcharge.imag)
+
+    split_sizes, displacements = make_splits_displacements(hamiltonian.nchol, trial.handler.ssize)
+    split_sizes_np = numpy.array(split_sizes, dtype=int)
+    displacements_np = numpy.array(displacements, dtype=int)
+
+    recvbuf_real = numpy.zeros(hamiltonian.nchol, dtype=tmp_real.dtype)
+    recvbuf_imag = numpy.zeros(hamiltonian.nchol, dtype=tmp_imag.dtype)
+
+    # print(split_sizes_np, displacements_np)
+    if MPI is None:
+        raise ImportError("mpi4py is not installed.")
+    else:
+        trial.handler.scomm.Gatherv(
+            tmp_real, [recvbuf_real, split_sizes_np, displacements_np, MPI.DOUBLE], root=0
+        )
+        trial.handler.scomm.Gatherv(
+            tmp_imag, [recvbuf_imag, split_sizes_np, displacements_np, MPI.DOUBLE], root=0
+        )
+
+    trial.handler.scomm.Bcast(recvbuf_real, root=0)
+    trial.handler.scomm.Bcast(recvbuf_imag, root=0)
+
+    mf_shift = 1.0j * recvbuf_real - recvbuf_imag
+    # mf_shift_1 = numpy.load("../Test_Disk_nochunk/mf_shift.npy")
+    # print(f'mf_shift complete,{numpy.allclose(mf_shift, mf_shift_1)}')
+
+    return xp.array(mf_shift)
+
+
 @plum.dispatch
 def construct_mean_field_shift(hamiltonian: GenericRealChol, trial: TrialWavefunctionBase):
     r"""Compute mean field shift.
@@ -64,10 +137,11 @@ def construct_mean_field_shift(hamiltonian: GenericRealChol, trial: TrialWavefun
     """
     # hamiltonian.chol [X, M^2]
     Gcharge = (trial.G[0] + trial.G[1]).ravel()
+    # Use numpy to reduce GPU memory use at this point, otherwise will be a problem of large chol cases
     tmp_real = numpy.dot(hamiltonian.chol.T, Gcharge.real)
     tmp_imag = numpy.dot(hamiltonian.chol.T, Gcharge.imag)
     mf_shift = 1.0j * tmp_real - tmp_imag
-    return mf_shift
+    return xp.array(mf_shift)
 
 
 @plum.dispatch
@@ -156,9 +230,9 @@ def propagate_walkers_two_body(self, walkers, hamiltonian, trial):
         cfb = xp.einsum("wx,wx->w", xi, xbar) - 0.5 * xp.einsum("wx,wx->w", xbar, xbar)
 
         xshifted = xshifted.T.copy()
-
         self.apply_VHS(walkers, hamiltonian, xshifted)
 
+        # xp._default_memory_pool.free_all_blocks()
         return (cmf, cfb)
 
     def propagate_walkers(self, walkers, hamiltonian, trial, eshift):
@@ -235,3 +309,6 @@ def apply_bound_hybrid(self, ehyb, eshift):  # shift is a number but ehyb is not
     @abstractmethod
     def apply_VHS(self, walkers, hamiltonian, xshifted):
         pass
+
+    def cast_to_cupy(self, verbose=False):
+        cast_to_device(self, verbose=verbose)
diff --git a/ipie/propagation/phaseless_generic.py b/ipie/propagation/phaseless_generic.py
index 5fcb3258..b80092ab 100644
--- a/ipie/propagation/phaseless_generic.py
+++ b/ipie/propagation/phaseless_generic.py
@@ -14,6 +14,7 @@
 
 from ipie.config import config
 from ipie.hamiltonians.generic import GenericComplexChol, GenericRealChol
+from ipie.hamiltonians.generic_chunked import GenericRealCholChunked
 from ipie.hamiltonians.generic_base import GenericBase
 from ipie.propagation.operations import apply_exponential, apply_exponential_batch
 from ipie.propagation.phaseless_base import PhaselessBase
@@ -62,13 +63,14 @@ def construct_VHS(self, hamiltonian: GenericBase, xshifted: xp.ndarray) -> xp.nd
     def construct_VHS(self, hamiltonian: GenericRealChol, xshifted: xp.ndarray) -> xp.ndarray:
         nwalkers = xshifted.shape[-1]
 
-        VHS_packed = hamiltonian.chol_packed.dot(
-            xshifted.real
-        ) + 1.0j * hamiltonian.chol_packed.dot(xshifted.imag)
+        VHS_packed = hamiltonian.chol_packed.dot(xshifted.real).astype(xshifted.dtype)
+        VHS_packed += 1.0j * hamiltonian.chol_packed.dot(
+            xshifted.imag
+        )  # in-place operation reduce gpu mem
 
         # (nb, nb, nw) -> (nw, nb, nb)
-        VHS_packed = (
-            self.isqrt_dt * VHS_packed.T.reshape(nwalkers, hamiltonian.chol_packed.shape[0]).copy()
+        VHS_packed = self.isqrt_dt * VHS_packed.T.reshape(
+            nwalkers, hamiltonian.chol_packed.shape[0]
         )
 
         VHS = xp.zeros(
@@ -83,6 +85,9 @@ def construct_VHS(self, hamiltonian: GenericRealChol, xshifted: xp.ndarray) -> x
             unpack_VHS_batch_gpu[blockspergrid, threadsperblock](
                 hamiltonian.sym_idx_i, hamiltonian.sym_idx_j, VHS_packed, VHS
             )
+            del VHS_packed
+            xp.cuda.runtime.deviceSynchronize()
+            xp._default_memory_pool.free_all_blocks()
         else:
             unpack_VHS_batch(hamiltonian.sym_idx[0], hamiltonian.sym_idx[1], VHS_packed, VHS)
         return VHS
@@ -112,30 +117,21 @@ def build(self, hamiltonian, trial=None, walkers=None, mpi_handler=None, verbose
         super().build(hamiltonian, trial, walkers, mpi_handler, verbose)
         self.mpi_handler = mpi_handler
 
-    @plum.dispatch.abstract
-    def construct_VHS(self, hamiltonian: GenericBase, xshifted: xp.ndarray) -> xp.ndarray:
-        "abstract function for construct VHS"
-
     @plum.dispatch
-    def construct_VHS(self, hamiltonian: GenericRealChol, xshifted: xp.ndarray) -> xp.ndarray:
+    def construct_VHS(
+        self, hamiltonian: GenericRealCholChunked, xshifted: xp.ndarray
+    ) -> xp.ndarray:
         assert hamiltonian.chunked
-        assert xp.isrealobj(hamiltonian.chol)
-
         nwalkers = xshifted.shape[-1]
 
-        # if hamiltonian.mixed_precision:  # cast it to float
-        #     xshifted = xshifted.astype(numpy.complex64)
-
-        #       xshifted is unique for each processor!
         xshifted_send = xshifted.copy()
         xshifted_recv = xp.zeros_like(xshifted)
 
         idxs = hamiltonian.chol_idxs_chunk
         chol_packed_chunk = hamiltonian.chol_packed_chunk
 
-        VHS_send = chol_packed_chunk.dot(xshifted[idxs, :].real) + 1.0j * chol_packed_chunk.dot(
-            xshifted[idxs, :].imag
-        )
+        VHS_send = chol_packed_chunk.dot(xshifted[idxs, :].real).astype(xshifted.dtype)
+        VHS_send += 1.0j * chol_packed_chunk.dot(xshifted[idxs, :].imag)
         VHS_recv = xp.zeros_like(VHS_send)
 
         srank = self.mpi_handler.scomm.rank
@@ -155,12 +151,10 @@ def construct_VHS(self, hamiltonian: GenericRealChol, xshifted: xp.ndarray) -> x
 
             self.mpi_handler.scomm.barrier()
 
-            # prepare sending
-            VHS_send = (
-                VHS_recv
-                + chol_packed_chunk.dot(xshifted_recv[idxs, :].real)
-                + 1.0j * chol_packed_chunk.dot(xshifted_recv[idxs, :].imag)
-            )
+            VHS_send = chol_packed_chunk.dot(xshifted_recv[idxs, :].real).astype(xshifted.dtype)
+            VHS_send += 1.0j * chol_packed_chunk.dot(xshifted_recv[idxs, :].imag)
+            VHS_send += VHS_recv
+
             xshifted_send = xshifted_recv.copy()
 
         synchronize()
@@ -169,7 +163,7 @@ def construct_VHS(self, hamiltonian: GenericRealChol, xshifted: xp.ndarray) -> x
         req.wait()
         self.mpi_handler.scomm.barrier()
 
-        VHS_recv = self.isqrt_dt * VHS_recv.T.reshape(nwalkers, chol_packed_chunk.shape[0]).copy()
+        VHS_recv = self.isqrt_dt * VHS_recv.T.reshape(nwalkers, chol_packed_chunk.shape[0])
         VHS = xp.zeros(
             (nwalkers, hamiltonian.nbasis, hamiltonian.nbasis),
             dtype=VHS_recv.dtype,
@@ -177,7 +171,8 @@ def construct_VHS(self, hamiltonian: GenericRealChol, xshifted: xp.ndarray) -> x
         # This should be abstracted by kernel import
         if config.get_option("use_gpu"):
             threadsperblock = 512
-            nut = len(hamiltonian.sym_idx_i)
+            nbsf = hamiltonian.nbasis
+            nut = round(nbsf * (nbsf + 1) / 2)
             blockspergrid = math.ceil(nwalkers * nut / threadsperblock)
             unpack_VHS_batch_gpu[blockspergrid, threadsperblock](
                 hamiltonian.sym_idx_i, hamiltonian.sym_idx_j, VHS_recv, VHS
diff --git a/ipie/propagation/propagator.py b/ipie/propagation/propagator.py
index 0200cd11..cc92576b 100644
--- a/ipie/propagation/propagator.py
+++ b/ipie/propagation/propagator.py
@@ -1,4 +1,10 @@
 from ipie.hamiltonians.generic import GenericRealChol, GenericComplexChol
-from ipie.propagation.phaseless_generic import PhaselessGeneric
+from ipie.hamiltonians.generic_chunked import GenericRealCholChunked
+from ipie.propagation.phaseless_generic import PhaselessGeneric, PhaselessGenericChunked
 
-Propagator = {GenericRealChol: PhaselessGeneric, GenericComplexChol: PhaselessGeneric}
+# Propagator = {GenericRealChol: PhaselessGeneric, GenericComplexChol: PhaselessGeneric}
+Propagator = {
+    GenericRealChol: PhaselessGeneric,
+    GenericComplexChol: PhaselessGeneric,
+    GenericRealCholChunked: PhaselessGenericChunked,
+}
diff --git a/ipie/propagation/tests/test_generic.py b/ipie/propagation/tests/test_generic.py
index 731cd3b3..91e71641 100644
--- a/ipie/propagation/tests/test_generic.py
+++ b/ipie/propagation/tests/test_generic.py
@@ -19,17 +19,11 @@
 import numpy
 import pytest
 
-from ipie.estimators.greens_function import (
-    greens_function_single_det_batch,
-)
+from ipie.estimators.greens_function import greens_function_single_det_batch
 from ipie.propagation.overlap import calc_overlap_single_det_uhf
+from ipie.utils.legacy_testing import build_legacy_test_case_handlers
 from ipie.utils.misc import dotdict
-from ipie.utils.testing import (
-    build_test_case_handlers,
-)
-from ipie.utils.legacy_testing import (
-    build_legacy_test_case_handlers,
-)
+from ipie.utils.testing import build_test_case_handlers
 
 
 @pytest.mark.unit
@@ -316,8 +310,10 @@ def test_vhs():
         }
     )
     legacy_data = build_legacy_test_case_handlers(nelec, nmo, num_dets=1, options=qmc, seed=7)
-    xshifted = numpy.random.normal(0.0, 1.0, nwalkers * legacy_data.hamiltonian.nfields).reshape(
-        nwalkers, legacy_data.hamiltonian.nfields
+    xshifted = (
+        numpy.random.normal(0.0, 1.0, nwalkers * legacy_data.hamiltonian.nfields)
+        .reshape(nwalkers, legacy_data.hamiltonian.nfields)
+        .astype(numpy.complex128)
     )
     vhs_serial = []
     for iw in range(nwalkers):
diff --git a/ipie/propagation/tests/test_generic_chunked.py b/ipie/propagation/tests/test_generic_chunked.py
index 5783eb9e..89eab83e 100644
--- a/ipie/propagation/tests/test_generic_chunked.py
+++ b/ipie/propagation/tests/test_generic_chunked.py
@@ -21,6 +21,8 @@
 
 from ipie.config import MPI
 from ipie.hamiltonians.generic import Generic as HamGeneric
+from ipie.hamiltonians.generic_chunked import GenericRealCholChunked
+from ipie.trial_wavefunction.single_det import SingleDet
 from ipie.propagation.force_bias import (
     construct_force_bias_batch_single_det,
     construct_force_bias_batch_single_det_chunked,
@@ -30,7 +32,7 @@
 from ipie.utils.misc import dotdict
 from ipie.utils.mpi import get_shared_array, MPIHandler
 from ipie.utils.pack_numba import pack_cholesky
-from ipie.utils.testing import build_random_single_det_trial, generate_hamiltonian
+from ipie.utils.testing import build_random_single_det_trial, generate_hamiltonian, get_random_nomsd
 from ipie.walkers.walkers_dispatch import UHFWalkersTrial
 
 comm = MPI.COMM_WORLD
@@ -71,17 +73,25 @@ def test_generic_propagation_chunked():
 
     chol = chol.reshape((nmo * nmo, nchol))
 
+    mpi_handler = MPIHandler(nmembers=3, verbose=(rank == 0))
+
     system = Generic(nelec=nelec)
-    ham = HamGeneric(h1e=numpy.array([h1e, h1e]), chol=chol, ecore=enuc)
-    trial, _ = build_random_single_det_trial(nelec, nmo)
+    ham = GenericRealCholChunked(
+        h1e=numpy.array([h1e, h1e]), chol=chol, ecore=enuc, handler=mpi_handler
+    )
+    ham_nochunk = HamGeneric(h1e=numpy.array([h1e, h1e]), chol=chol, ecore=enuc)
+    _, wfn = get_random_nomsd(system.nup, system.ndown, ham.nbasis, ndet=1, cplx=False)
+    trial = SingleDet(wfn[0], nelec, nmo)
+    trial.handler = mpi_handler
+    if comm.rank == 0:
+        print("# Chunking trial.")
     trial.half_rotate(ham)
-    trial.calculate_energy(system, ham)
 
-    qmc = dotdict({"dt": 0.005, "nstblz": 5, "batched": True, "nwalkers": nwalkers})
+    trial_nochunk = SingleDet(wfn[0], nelec, nmo)
+    trial_nochunk.half_rotate(ham_nochunk)
+    trial_nochunk.calculate_energy(system, ham_nochunk)
 
-    mpi_handler = MPIHandler(nmembers=3, verbose=(rank == 0))
-    ham.chunk(mpi_handler)
-    trial.chunk(mpi_handler)
+    qmc = dotdict({"dt": 0.005, "nstblz": 5, "batched": True, "nwalkers": nwalkers})
 
     prop = PhaselessGenericChunked(qmc["dt"])
     prop.build(ham, trial, mpi_handler=mpi_handler)
@@ -94,25 +104,27 @@ def test_generic_propagation_chunked():
     walker_batch.build(trial)
 
     for i in range(nsteps):
-        prop.propagate_walkers(walker_batch, ham, trial, trial.energy)
+        prop.propagate_walkers(walker_batch, ham, trial, trial_nochunk.energy)
         walker_batch.reortho()
 
-    vfb = construct_force_bias_batch_single_det(ham, walker_batch, trial)
+    vfb = construct_force_bias_batch_single_det(ham_nochunk, walker_batch, trial_nochunk)
     vfb_chunked = construct_force_bias_batch_single_det_chunked(
         ham, walker_batch, trial, mpi_handler
     )
 
     assert numpy.allclose(vfb, vfb_chunked)
-    xshifted = numpy.random.normal(0.0, 1.0, ham.nchol * walker_batch.nwalkers).reshape(
-        walker_batch.nwalkers, ham.nchol
+    xshifted = (
+        numpy.random.normal(0.0, 1.0, ham.nchol * walker_batch.nwalkers)
+        .reshape(walker_batch.nwalkers, ham.nchol)
+        .astype(numpy.complex128)
     )
     VHS_chunked = prop.construct_VHS(
         ham,
         xshifted.T.copy(),
     )
     prop = PhaselessGeneric(qmc["dt"])
-    prop.build(ham, trial)
-    VHS = prop.construct_VHS(ham, xshifted.T.copy())
+    prop.build(ham_nochunk, trial_nochunk)
+    VHS = prop.construct_VHS(ham_nochunk, xshifted.T.copy())
     assert numpy.allclose(VHS, VHS_chunked)
 
 
diff --git a/ipie/propagation/tests/test_generic_complex.py b/ipie/propagation/tests/test_generic_complex.py
index c072806e..fcf9af84 100644
--- a/ipie/propagation/tests/test_generic_complex.py
+++ b/ipie/propagation/tests/test_generic_complex.py
@@ -98,7 +98,11 @@ def test_vhs_complex():
     )
 
     ham = test_handler.hamiltonian
-    xshifted = numpy.random.normal(0.0, 1.0, nwalkers * ham.nfields).reshape(ham.nfields, nwalkers)
+    xshifted = (
+        numpy.random.normal(0.0, 1.0, nwalkers * ham.nfields)
+        .reshape(ham.nfields, nwalkers)
+        .astype(numpy.complex128)
+    )
 
     vhs = test_handler.propagator.construct_VHS(ham, xshifted)
 
@@ -145,7 +149,11 @@ def test_vhs_complex_vs_real():
     ham = test_handler.hamiltonian
     chol = ham.chol
 
-    xshifted = numpy.random.normal(0.0, 1.0, nwalkers * ham.nfields).reshape(ham.nfields, nwalkers)
+    xshifted = (
+        numpy.random.normal(0.0, 1.0, nwalkers * ham.nfields)
+        .reshape(ham.nfields, nwalkers)
+        .astype(numpy.complex128)
+    )
 
     vhs = test_handler.propagator.construct_VHS(ham, xshifted)
 
diff --git a/ipie/qmc/afqmc.py b/ipie/qmc/afqmc.py
index 5b9058b3..e71edbc4 100644
--- a/ipie/qmc/afqmc.py
+++ b/ipie/qmc/afqmc.py
@@ -17,6 +17,7 @@
 #
 
 """Driver to perform AFQMC calculation"""
+import abc
 import json
 import time
 import uuid
@@ -41,8 +42,216 @@
 from ipie.walkers.walkers_dispatch import get_initial_walker, UHFWalkersTrial
 
 
-class AFQMC(object):
-    """AFQMC driver.
+class AFQMCBase(metaclass=abc.ABCMeta):
+    """Basic interface for AFQMC Driver"""
+
+    def __init__(
+        self,
+        system,
+        hamiltonian,
+        trial,
+        walkers,
+        propagator,
+        mpi_handler,
+        params: QMCParams,
+        verbose: int = 0,
+    ):
+        self.system = system
+        self.hamiltonian = hamiltonian
+        self.trial = trial
+        self.walkers = walkers
+        self.propagator = propagator
+        self.mpi_handler = mpi_handler  # mpi_handler should be passed into here
+        self.shared_comm = self.mpi_handler.shared_comm
+        self.verbose = verbose
+        self.verbosity = int(verbose)
+        self.params = params
+        self._init_time = time.time()
+        self._parallel_rng_seed = set_rng_seed(params.rng_seed, self.mpi_handler.comm)
+
+    @abc.abstractmethod
+    def run(
+        self,
+        walkers=None,
+        estimator_filename=None,
+        verbose=True,
+        additional_estimators: Optional[Dict[str, EstimatorBase]] = None,
+    ):
+        """Code to run the AFQMC calculation."""
+        raise NotImplementedError
+
+    def distribute_hamiltonian(self):
+        if self.mpi_handler.nmembers > 1:
+            if self.mpi_handler.comm.rank == 0:
+                print("# Chunking hamiltonian.")
+            # self.hamiltonian.chunk(self.mpi_handler)
+            if self.mpi_handler.comm.rank == 0:
+                print("# Chunking trial.")
+            self.trial.chunk(self.mpi_handler)
+
+    def copy_to_gpu(self):
+        comm = self.mpi_handler.comm
+        if config.get_option("use_gpu"):
+            ngpus = xp.cuda.runtime.getDeviceCount()
+            _ = xp.cuda.runtime.getDeviceProperties(0)
+            # xp.cuda.runtime.setDevice(self.shared_comm.rank % 4)
+            xp.cuda.runtime.setDevice(comm.rank % ngpus)
+            if comm.rank == 0:
+                if ngpus > comm.size:
+                    print(
+                        f"# There are unused GPUs ({comm.size} MPI tasks but {ngpus} GPUs). "
+                        " Check if this is really what you wanted."
+                    )
+            self.propagator.cast_to_cupy(self.verbose and comm.rank == 0)
+            self.hamiltonian.cast_to_cupy(self.verbose and comm.rank == 0)
+            self.trial.cast_to_cupy(self.verbose and comm.rank == 0)
+            self.walkers.cast_to_cupy(self.verbose and comm.rank == 0)
+
+    def get_env_info(self):
+        # TODO: Move this somewhere else.
+        this_uuid = str(uuid.uuid1())
+        try:
+            sha1, branch, local_mods = get_git_info()
+        except:
+            sha1 = "None"
+            branch = "None"
+            local_mods = []
+        if self.verbose:
+            self.sys_info = print_env_info(
+                sha1, branch, local_mods, this_uuid, self.mpi_handler.size
+            )
+            mem_avail = get_host_memory()
+            print(f"# MPI communicator : {type(self.mpi_handler.comm)}")
+            print(f"# Available memory on the node is {mem_avail:4.3f} GB")
+
+    def setup_timers(self):
+        # TODO: Better timer
+        self.tsetup = 0
+        self.tortho = 0
+        self.tprop = 0
+
+        self.tprop_fbias = 0.0
+        self.tprop_ovlp = 0.0
+        self.tprop_update = 0.0
+        self.tprop_gf = 0.0
+        self.tprop_vhs = 0.0
+        self.tprop_gemm = 0.0
+        self.tprop_clip = 0.0
+        self.tprop_barrier = 0.0
+
+        self.testim = 0
+        self.tpopc = 0
+        self.tpopc_comm = 0
+        self.tpopc_non_comm = 0
+        self.tstep = 0
+
+    def finalise(self, verbose=False):
+        """Tidy up.
+
+        Parameters
+        ----------
+        verbose : bool
+            If true print out some information to stdout.
+        """
+        nsteps = max(self.params.num_steps_per_block, 1)
+        nblocks = max(self.params.num_blocks, 1)
+        nstblz = max(nsteps // self.params.num_stblz, 1)
+        npcon = max(nsteps // self.params.pop_control_freq, 1)
+        if self.mpi_handler.rank == 0:
+            if verbose:
+                print(f"# End Time: {time.asctime():s}")
+                print(f"# Running time : {time.time() - self._init_time:.6f} seconds")
+                print("# Timing breakdown (per call, total calls per block, total blocks):")
+                print(f"# - Setup: {self.tsetup:.6f} s")
+                print(
+                    "# - Block: {:.6f} s / block for {} total blocks".format(
+                        self.tstep / (nblocks), nblocks
+                    )
+                )
+                print(
+                    "# - Propagation: {:.6f} s / call for {} call(s) in each of {} blocks".format(
+                        self.tprop / (nblocks * nsteps), nsteps, nblocks
+                    )
+                )
+                print(
+                    "#     -       Force bias: {:.6f} s / call for {} call(s) in each of {} blocks".format(
+                        self.tprop_fbias / (nblocks * nsteps), nsteps, nblocks
+                    )
+                )
+                print(
+                    "#     -              VHS: {:.6f} s / call for {} call(s) in each of {} blocks".format(
+                        self.tprop_vhs / (nblocks * nsteps), nsteps, nblocks
+                    )
+                )
+                print(
+                    "#     - Green's Function: {:.6f} s / call for {} call(s) in each of {} blocks".format(
+                        self.tprop_gf / (nblocks * nsteps), nsteps, nblocks
+                    )
+                )
+                print(
+                    "#     -          Overlap: {:.6f} s / call for {} call(s) in each of {} blocks".format(
+                        self.tprop_ovlp / (nblocks * nsteps), nsteps, nblocks
+                    )
+                )
+                print(
+                    "#     -   Weights Update: {:.6f} s / call for {} call(s) in each of {} blocks".format(
+                        (self.tprop_update + self.tprop_clip) / (nblocks * nsteps), nsteps, nblocks
+                    )
+                )
+                print(
+                    "#     -  GEMM operations: {:.6f} s / call for {} call(s) in each of {} blocks".format(
+                        self.tprop_gemm / (nblocks * nsteps), nsteps, nblocks
+                    )
+                )
+                print(
+                    "#     -          Barrier: {:.6f} s / call for {} call(s) in each of {} blocks".format(
+                        self.tprop_barrier / (nblocks * nsteps), nsteps, nblocks
+                    )
+                )
+                print(
+                    "# - Estimators: {:.6f} s / call for {} call(s)".format(
+                        self.testim / nblocks, nblocks
+                    )
+                )
+                print(
+                    "# - Orthogonalisation: {:.6f} s / call for {} call(s) in each of {} blocks".format(
+                        self.tortho / (nstblz * nblocks), nstblz, nblocks
+                    )
+                )
+                print(
+                    "# - Population control: {:.6f} s / call for {} call(s) in each of {} blocks".format(
+                        self.tpopc / (npcon * nblocks), npcon, nblocks
+                    )
+                )
+                print(
+                    "#       -     Commnication: {:.6f} s / call for {} call(s) in each of {} blocks".format(
+                        self.tpopc_comm / (npcon * nblocks), npcon, nblocks
+                    )
+                )
+                print(
+                    "#       - Non-Commnication: {:.6f} s / call for {} call(s) in each of {} blocks".format(
+                        self.tpopc_non_comm / (npcon * nblocks), npcon, nblocks
+                    )
+                )
+
+    def determine_dtype(self, propagator, system):
+        """Determine dtype for trial wavefunction and walkers.
+
+        Parameters
+        ----------
+        propagator : dict
+            Propagator input options.
+        system : object
+            system object.
+        """
+        hs_type = propagator.get("hubbard_stratonovich", "discrete")
+        continuous = "continuous" in hs_type
+        twist = system.ktwist.all() is not None
+        return continuous or twist
+
+
+class AFQMC(AFQMCBase):
+    """AFQMC driver for zero temperature open ended random walk.
 
     Parameters
     ----------
@@ -69,20 +278,19 @@ class AFQMC(object):
     """
 
     def __init__(
-        self, system, hamiltonian, trial, walkers, propagator, params: QMCParams, verbose: int = 0
+        self,
+        system,
+        hamiltonian,
+        trial,
+        walkers,
+        propagator,
+        mpi_handler,
+        params: QMCParams,
+        verbose: int = 0,
     ):
-        self.system = system
-        self.hamiltonian = hamiltonian
-        self.trial = trial
-        self.walkers = walkers
-        self.propagator = propagator
-        self.mpi_handler = MPIHandler()
-        self.shared_comm = self.mpi_handler.shared_comm
-        self.verbose = verbose
-        self.verbosity = int(verbose)
-        self.params = params
-        self._init_time = time.time()
-        self._parallel_rng_seed = set_rng_seed(params.rng_seed, self.mpi_handler.comm)
+        super().__init__(
+            system, hamiltonian, trial, walkers, propagator, mpi_handler, params, verbose
+        )
 
     @staticmethod
     # TODO: wavefunction type, trial type, hamiltonian type
@@ -92,7 +300,7 @@ def build(
         trial_wavefunction,
         walkers=None,
         num_walkers: int = 100,
-        seed: int = None,
+        seed: Optional[int] = None,
         num_steps_per_block: int = 25,
         num_blocks: int = 100,
         timestep: float = 0.005,
@@ -109,7 +317,7 @@ def build(
             Number of alpha and beta electrons.
         hamiltonian :
             Hamiltonian describing the system.
-        trial_wavefunction :
+        trial_wavefunction:
             Trial wavefunction
         num_walkers : int
             Number of walkers per MPI process used in the simulation. The TOTAL
@@ -168,7 +376,7 @@ def build(
             )
             walkers.build(
                 trial_wavefunction
-            )  # any intermediates that require information from trial
+            )  # any intermediates that require information from trial_wavefunction
         # TODO: this is a factory not a class
         propagator = Propagator[type(hamiltonian)](params.timestep)
         propagator.build(hamiltonian, trial_wavefunction, walkers, mpi_handler)
@@ -178,6 +386,7 @@ def build(
             trial_wavefunction,
             walkers,
             propagator,
+            mpi_handler,
             params,
             verbose=(verbose and comm.rank == 0),
         )
@@ -189,13 +398,14 @@ def build_from_hdf5(
         ham_file,
         wfn_file,
         num_walkers: int = 100,
-        seed: int = None,
+        seed: Optional[int] = None,
         num_steps_per_block: int = 25,
         num_blocks: int = 100,
         timestep: float = 0.005,
         stabilize_freq=5,
         pop_control_freq=5,
         num_dets_chunk=1,
+        num_dets_for_trial_props=100,
         pack_cholesky=True,
         verbose=True,
     ) -> "AFQMC":
@@ -228,6 +438,8 @@ def build_from_hdf5(
                 steps.) Default 25.
         num_det_chunks : int
             Size of chunks of determinants to process during batching. Default=1 (no batching).
+        num_dets_for_trial_props: int
+            Number of determinants to use to evaluate trial wavefunction properties.
         pack_cholesky : bool
             Use symmetry to reduce memory consumption of integrals. Default True.
         verbose : bool
@@ -239,7 +451,12 @@ def build_from_hdf5(
             ham_file, mpi_handler.scomm, verbose=_verbose, pack_chol=pack_cholesky
         )
         trial = get_trial_wavefunction(
-            num_elec, ham.nbasis, wfn_file, ndet_chunks=num_dets_chunk, verbose=_verbose
+            num_elec,
+            ham.nbasis,
+            wfn_file,
+            ndet_chunks=num_dets_chunk,
+            ndets_props=num_dets_for_trial_props,
+            verbose=_verbose,
         )
         trial.half_rotate(ham, mpi_handler.scomm)
         return AFQMC.build(
@@ -257,49 +474,6 @@ def build_from_hdf5(
             mpi_handler=mpi_handler,
         )
 
-    def distribute_hamiltonian(self):
-        if self.mpi_handler.nmembers > 1:
-            if self.mpi_handler.comm.rank == 0:
-                print("# Chunking hamiltonian.")
-            self.hamiltonian.chunk(self.mpi_handler)
-            if self.mpi_handler.comm.rank == 0:
-                print("# Chunking trial.")
-            self.trial.chunk(self.mpi_handler)
-
-    def copy_to_gpu(self):
-        comm = self.mpi_handler.comm
-        if config.get_option("use_gpu"):
-            ngpus = xp.cuda.runtime.getDeviceCount()
-            _ = xp.cuda.runtime.getDeviceProperties(0)
-            xp.cuda.runtime.setDevice(self.shared_comm.rank)
-            if comm.rank == 0:
-                if ngpus > comm.size:
-                    print(
-                        f"# There are unused GPUs ({comm.size} MPI tasks but {ngpus} GPUs). "
-                        " Check if this is really what you wanted."
-                    )
-            self.propagator.cast_to_cupy(self.verbose and comm.rank == 0)
-            self.hamiltonian.cast_to_cupy(self.verbose and comm.rank == 0)
-            self.trial.cast_to_cupy(self.verbose and comm.rank == 0)
-            self.walkers.cast_to_cupy(self.verbose and comm.rank == 0)
-
-    def get_env_info(self):
-        # TODO: Move this somewhere else.
-        this_uuid = str(uuid.uuid1())
-        try:
-            sha1, branch, local_mods = get_git_info()
-        except:
-            sha1 = "None"
-            branch = "None"
-            local_mods = []
-        if self.verbose:
-            self.sys_info = print_env_info(
-                sha1, branch, local_mods, this_uuid, self.mpi_handler.size
-            )
-            mem_avail = get_host_memory()
-            print(f"# MPI communicator : {type(self.mpi_handler.comm)}")
-            print(f"# Available memory on the node is {mem_avail:4.3f} GB")
-
     def setup_estimators(
         self, filename, additional_estimators: Optional[Dict[str, EstimatorBase]] = None
     ):
@@ -326,37 +500,14 @@ def setup_estimators(
 
         self.estimators.initialize(comm)
         # Calculate estimates for initial distribution of walkers.
-        self.estimators.compute_estimators(
-            comm, self.system, self.hamiltonian, self.trial, self.walkers
-        )
+        self.estimators.compute_estimators(self.system, self.hamiltonian, self.trial, self.walkers)
         self.accumulators.update(self.walkers)
         self.estimators.print_block(comm, 0, self.accumulators)
         self.accumulators.zero()
 
-    def setup_timers(self):
-        # TODO: Better timer
-        self.tsetup = 0
-        self.tortho = 0
-        self.tprop = 0
-
-        self.tprop_fbias = 0.0
-        self.tprop_ovlp = 0.0
-        self.tprop_update = 0.0
-        self.tprop_gf = 0.0
-        self.tprop_vhs = 0.0
-        self.tprop_gemm = 0.0
-        self.tprop_clip = 0.0
-        self.tprop_barrier = 0.0
-
-        self.testim = 0
-        self.tpopc = 0
-        self.tpopc_comm = 0
-        self.tpopc_non_comm = 0
-        self.tstep = 0
-
     def run(
         self,
-        psi=None,
+        walkers=None,
         estimator_filename=None,
         verbose=True,
         additional_estimators: Optional[Dict[str, EstimatorBase]] = None,
@@ -365,7 +516,7 @@ def run(
 
         Parameters
         ----------
-        psi : :class:`pie.walker.Walkers` object
+        walkers : :class:`pie.walker.Walkers` object
             Initial wavefunction / distribution of walkers. Default None.
         estimator_filename : str
             File to write estimates to.
@@ -374,8 +525,8 @@ def run(
         """
         self.setup_timers()
         tzero_setup = time.time()
-        if psi is not None:
-            self.walkers = psi
+        if walkers is not None:
+            self.walkers = walkers
         self.setup_timers()
         eshift = 0.0
         self.walkers.orthogonalise()
@@ -388,8 +539,12 @@ def run(
         )
 
         self.get_env_info()
+        # self.distribute_hamiltonian()
         self.copy_to_gpu()
-        self.distribute_hamiltonian()
+
+        # from ipie.utils.backend import get_device_memory
+        # used_bytes, total_bytes = get_device_memory()
+        # print(f"# after distribute {comm.rank}: using {used_bytes/1024**3} GB out of {total_bytes/1024**3} GB memory on GPU")
         self.setup_estimators(estimator_filename, additional_estimators=additional_estimators)
 
         # TODO: This magic value of 2 is pretty much never controlled on input.
@@ -457,7 +612,7 @@ def run(
             start = time.time()
             if step % self.params.num_steps_per_block == 0:
                 self.estimators.compute_estimators(
-                    comm, self.system, self.hamiltonian, self.trial, self.walkers
+                    self.system, self.hamiltonian, self.trial, self.walkers
                 )
                 self.estimators.print_block(
                     comm, step // self.params.num_steps_per_block, self.accumulators
@@ -476,107 +631,3 @@ def run(
                 eshift += self.accumulators.eshift - eshift
             synchronize()
             self.tstep += time.time() - start_step
-
-    def finalise(self, verbose=False):
-        """Tidy up.
-
-        Parameters
-        ----------
-        verbose : bool
-            If true print out some information to stdout.
-        """
-        nsteps = max(self.params.num_steps_per_block, 1)
-        nblocks = max(self.params.num_blocks, 1)
-        nstblz = max(nsteps // self.params.num_stblz, 1)
-        npcon = max(nsteps // self.params.pop_control_freq, 1)
-        if self.mpi_handler.rank == 0:
-            if verbose:
-                print(f"# End Time: {time.asctime():s}")
-                print(f"# Running time : {time.time() - self._init_time:.6f} seconds")
-                print("# Timing breakdown (per call, total calls per block, total blocks):")
-                print(f"# - Setup: {self.tsetup:.6f} s")
-                print(
-                    "# - Block: {:.6f} s / block for {} total blocks".format(
-                        self.tstep / (nblocks), nblocks
-                    )
-                )
-                print(
-                    "# - Propagation: {:.6f} s / call for {} call(s) in each of {} blocks".format(
-                        self.tprop / (nblocks * nsteps), nsteps, nblocks
-                    )
-                )
-                print(
-                    "#     -       Force bias: {:.6f} s / call for {} call(s) in each of {} blocks".format(
-                        self.tprop_fbias / (nblocks * nsteps), nsteps, nblocks
-                    )
-                )
-                print(
-                    "#     -              VHS: {:.6f} s / call for {} call(s) in each of {} blocks".format(
-                        self.tprop_vhs / (nblocks * nsteps), nsteps, nblocks
-                    )
-                )
-                print(
-                    "#     - Green's Function: {:.6f} s / call for {} call(s) in each of {} blocks".format(
-                        self.tprop_gf / (nblocks * nsteps), nsteps, nblocks
-                    )
-                )
-                print(
-                    "#     -          Overlap: {:.6f} s / call for {} call(s) in each of {} blocks".format(
-                        self.tprop_ovlp / (nblocks * nsteps), nsteps, nblocks
-                    )
-                )
-                print(
-                    "#     -   Weights Update: {:.6f} s / call for {} call(s) in each of {} blocks".format(
-                        (self.tprop_update + self.tprop_clip) / (nblocks * nsteps), nsteps, nblocks
-                    )
-                )
-                print(
-                    "#     -  GEMM operations: {:.6f} s / call for {} call(s) in each of {} blocks".format(
-                        self.tprop_gemm / (nblocks * nsteps), nsteps, nblocks
-                    )
-                )
-                print(
-                    "#     -          Barrier: {:.6f} s / call for {} call(s) in each of {} blocks".format(
-                        self.tprop_barrier / (nblocks * nsteps), nsteps, nblocks
-                    )
-                )
-                print(
-                    "# - Estimators: {:.6f} s / call for {} call(s)".format(
-                        self.testim / nblocks, nblocks
-                    )
-                )
-                print(
-                    "# - Orthogonalisation: {:.6f} s / call for {} call(s) in each of {} blocks".format(
-                        self.tortho / (nstblz * nblocks), nstblz, nblocks
-                    )
-                )
-                print(
-                    "# - Population control: {:.6f} s / call for {} call(s) in each of {} blocks".format(
-                        self.tpopc / (npcon * nblocks), npcon, nblocks
-                    )
-                )
-                print(
-                    "#       -     Commnication: {:.6f} s / call for {} call(s) in each of {} blocks".format(
-                        self.tpopc_comm / (npcon * nblocks), npcon, nblocks
-                    )
-                )
-                print(
-                    "#       - Non-Commnication: {:.6f} s / call for {} call(s) in each of {} blocks".format(
-                        self.tpopc_non_comm / (npcon * nblocks), npcon, nblocks
-                    )
-                )
-
-    def determine_dtype(self, propagator, system):
-        """Determine dtype for trial wavefunction and walkers.
-
-        Parameters
-        ----------
-        propagator : dict
-            Propagator input options.
-        system : object
-            system object.
-        """
-        hs_type = propagator.get("hubbard_stratonovich", "discrete")
-        continuous = "continuous" in hs_type
-        twist = system.ktwist.all() is not None
-        return continuous or twist
diff --git a/ipie/qmc/calc.py b/ipie/qmc/calc.py
index e51d6f08..fadc7f08 100644
--- a/ipie/qmc/calc.py
+++ b/ipie/qmc/calc.py
@@ -143,6 +143,7 @@ def get_driver(options: dict, comm: MPI.COMM_WORLD) -> AFQMC:
             trial,
             walkers,
             propagator,
+            mpi_handler,
             params,
             verbose=(verbosity and comm.rank == 0),
         )
diff --git a/ipie/qmc/options.py b/ipie/qmc/options.py
index 44c52474..faa9a1a6 100644
--- a/ipie/qmc/options.py
+++ b/ipie/qmc/options.py
@@ -45,60 +45,32 @@ class QMCOpts(object):
     Initialised from a dict containing the following options, not all of which
     are required.
 
-    Parameters
+    Attributes
     ----------
-    method : string
-        Which auxiliary field method are we using? Currently only CPMC is
-        implemented.
+    batched : bool
+        Whether to do batched calculations.
     nwalkers : int
         Number of walkers to propagate in a simulation.
     dt : float
         Timestep.
     nsteps : int
         Number of steps per block.
-    nmeasure : int
-        Frequency of energy measurements.
+    nblocks : int
+        Number of blocks. Total number of iterations = nblocks * nsteps.
     nstblz : int
         Frequency of Gram-Schmidt orthogonalisation steps.
     npop_control : int
         Frequency of population control.
-    temp : float
-        Temperature. Currently not used.
-    nequilibrate : int
-        Number of steps used for equilibration phase. Only used to fix local
+    pop_control_method : str
+        Population control method.
+    eqlb_time : float
+        Time scale of equilibration phase. Only used to fix local
         energy bound when using phaseless approximation.
-    importance_sampling : boolean
-        Are we using importance sampling. Default True.
-    hubbard_statonovich : string
-        Which hubbard stratonovich transformation are we using. Currently the
-        options are:
-
-        - discrete : Use the discrete Hirsch spin transformation.
-        - opt_continuous : Use the continuous transformation for the Hubbard
-          model.
-        - generic : Use the generic transformation. To be used with Generic
-          system class.
-
-    ffts : boolean
-        Use FFTS to diagonalise the kinetic energy propagator? Default False.
-        This may speed things up for larger lattices.
-
-    Attributes
-    ----------
-    cplx : boolean
-        Do we require complex wavefunctions?
-    mf_shift : float
-        Mean field shift for continuous Hubbard-Stratonovich transformation.
-    iut_fac : complex float
-        Stores i*(U*dt)**0.5 for continuous Hubbard-Stratonovich transformation.
-    ut_fac : float
-        Stores (U*dt) for continuous Hubbard-Stratonovich transformation.
-    mf_nsq : float
-        Stores M * mf_shift for continuous Hubbard-Stratonovich transformation.
-    local_energy_bound : float
-        Energy pound for continuous Hubbard-Stratonovich transformation.
-    mean_local_energy : float
-        Estimate for mean energy for continuous Hubbard-Stratonovich transformation.
+    neqlb : int
+        Number of time steps for the equilibration phase. Only used to fix the
+        local energy bound when using phaseless approximation.
+    rng_seed : int
+        The random number seed.
     """
 
     # pylint: disable=dangerous-default-value
@@ -133,6 +105,13 @@ def __init__(self, inputs={}, verbose=False):
             alias=["npop_control", "pop_control"],
             verbose=verbose,
         )
+        self.pop_control_method = get_input_value(
+            inputs,
+            "pop_control_method",
+            default="pair_branch",
+            alias=["pop_control", "population_control"],
+            verbose=verbose,
+        )
         self.eqlb_time = get_input_value(
             inputs,
             "equilibration_time",
@@ -160,19 +139,27 @@ def __str__(self, verbose=0):
 class QMCParams:
     r"""Input options and certain constants / parameters derived from them.
 
-    Args:
-        num_walkers: number of walkers **per** core / task / computational unit.
-        total_num_walkers: The total number of walkers in the simulation.
-        timestep: The timestep delta_t
-        num_steps_per_block: Number of steps of propagation before estimators
-            are evaluated.
-        num_blocks: The number of blocks. Total number of iterations =
-            num_blocks * num_steps_per_block.
-        num_stblz: number of steps before QR stabilization of walkers is performed.
-        pop_control_freq: Frequency at which population control occurs.
-        rng_seed: The random number seed. If run in parallel the seeds on other
-            cores / threads are determined from this.
+    Attributes
+    ----------
+    num_walkers : int
+        Number of walkers **per** core / task / computational unit.
+    total_num_walkers : int
+        The total number of walkers in the simulation.
+    timestep : float
+        The timestep delta_t
+    num_steps_per_block : int
+        Number of steps of propagation before estimators are evaluated.
+    num_blocks : int
+        Number of blocks. Total number of iterations = num_blocks * num_steps_per_block.
+    num_stblz : int
+        Number of steps before QR stabilization of walkers is performed.
+    pop_control_freq : int
+        Frequency at which population control occurs.
+    rng_seed : int
+        The random number seed. If run in parallel the seeds on other cores /
+        threads are determined from this.
     """
+
     num_walkers: int
     total_num_walkers: int
     timestep: float
diff --git a/ipie/qmc/tests/reference_data/ft_ueg_ecut1.0_rs1.0/reference.json b/ipie/qmc/tests/reference_data/ft_ueg_ecut1.0_rs1.0/reference.json
index c278179f..d1cfd38b 100644
--- a/ipie/qmc/tests/reference_data/ft_ueg_ecut1.0_rs1.0/reference.json
+++ b/ipie/qmc/tests/reference_data/ft_ueg_ecut1.0_rs1.0/reference.json
@@ -1 +1 @@
-{"WeightFactor": [32.0, 47.94831398616111], "Weight": [32.0, 31.999999999999996], "ENumer": [853.4128425513718, 986.7912119123343], "EDenom": [32.0, 31.999999999999996], "ETotal": [26.66915132973037, 30.837225372260455], "E1Body": [28.374994808285745, 33.21707786343164], "E2Body": [-1.705843478555375, -2.379852491171182], "EHybrid": [0.0, 0.0], "Overlap": [1.0, 1.0], "Nav": [14.000000381209672, 16.375812925843057], "sys_info": {"nranks": 4, "branch": "hubbard_updates", "sha1": "618262bc7511a252e2f2bb3f23cc96fa4e8b9eb5", "numpy": {"version": "1.18.4", "path": "/usr/local/lib/python3.7/site-packages/numpy", "BLAS": {"lib": "openblas openblas", "path": "/usr/local/lib"}}, "scipy": {"version": "1.4.1", "path": "/usr/local/lib/python3.7/site-packages/scipy"}, "h5py": {"version": "2.10.0", "path": "/usr/local/lib/python3.7/site-packages/h5py"}, "mpi4py": {"version": "3.0.1", "path": "/usr/local/lib/python3.7/site-packages/mpi4py", "mpicc": "/usr/local/bin/mpicc"}}}
\ No newline at end of file
+{"WeightFactor": [32.0, 47.947639102045635], "Weight": [32.0, 31.999999999999993], "ENumer": [853.4128425513718, 986.7978362646822], "EDenom": [32.0, 31.999999999999993], "ETotal": [26.66915132973037, 30.837432383271327], "E1Body": [28.374994808285745, 33.217171356971804], "E2Body": [-1.705843478555375, -2.379738973700476], "EHybrid": [0.0, 0.0], "Overlap": [1.0, 1.0], "Nav": [14.000000381209672, 16.37587194751124]}
\ No newline at end of file
diff --git a/ipie/qmc/tests/test_afqmc_multi_det_batch.py b/ipie/qmc/tests/test_afqmc_multi_det_batch.py
index f37d4320..b9f0aae6 100644
--- a/ipie/qmc/tests/test_afqmc_multi_det_batch.py
+++ b/ipie/qmc/tests/test_afqmc_multi_det_batch.py
@@ -75,7 +75,7 @@ def test_generic_multi_det_batch():
         afqmc.run(verbose=0, estimator_filename=tmpf.name)
         afqmc.finalise(verbose=0)
         afqmc.estimators.compute_estimators(
-            comm, afqmc.system, afqmc.hamiltonian, afqmc.trial, afqmc.walkers
+            afqmc.system, afqmc.hamiltonian, afqmc.trial, afqmc.walkers
         )
         numer_batch = afqmc.estimators["energy"]["ENumer"]
         denom_batch = afqmc.estimators["energy"]["EDenom"]
diff --git a/ipie/qmc/tests/test_afqmc_single_det_batch.py b/ipie/qmc/tests/test_afqmc_single_det_batch.py
index 8a426314..c11317dd 100644
--- a/ipie/qmc/tests/test_afqmc_single_det_batch.py
+++ b/ipie/qmc/tests/test_afqmc_single_det_batch.py
@@ -78,7 +78,7 @@ def test_generic_single_det_batch():
         afqmc.run(verbose=False, estimator_filename=tmpf.name)
         afqmc.finalise(verbose=0)
         afqmc.estimators.compute_estimators(
-            comm, afqmc.system, afqmc.hamiltonian, afqmc.trial, afqmc.walkers
+            afqmc.system, afqmc.hamiltonian, afqmc.trial, afqmc.walkers
         )
         numer_batch = afqmc.estimators["energy"]["ENumer"]
         denom_batch = afqmc.estimators["energy"]["EDenom"]
@@ -165,7 +165,7 @@ def test_generic_single_det_batch_density_diff():
         afqmc.run(verbose=False, estimator_filename=tmpf.name)
         afqmc.finalise(verbose=0)
         afqmc.estimators.compute_estimators(
-            comm, afqmc.system, afqmc.hamiltonian, afqmc.trial, afqmc.walkers
+            afqmc.system, afqmc.hamiltonian, afqmc.trial, afqmc.walkers
         )
 
         numer_batch = afqmc.estimators["energy"]["ENumer"]
diff --git a/ipie/systems/utils.py b/ipie/systems/utils.py
index d1fe1912..6822f2c4 100644
--- a/ipie/systems/utils.py
+++ b/ipie/systems/utils.py
@@ -44,11 +44,12 @@ def get_system(sys_opts=None, verbose=0, comm=None):
             if comm.rank == 0:
                 print("# Error: Number of electrons not specified.")
                 sys.exit()
+
         nelec = (nup, ndown)
         system = Generic(nelec, verbose)
     else:
         if comm.rank == 0:
             print(f"# Error: unrecognized system name {sys_type}.")
-        raise ValueError
 
+        raise ValueError
     return system
diff --git a/ipie/trial_wavefunction/half_rotate.py b/ipie/trial_wavefunction/half_rotate.py
index fc0494fc..7365561a 100644
--- a/ipie/trial_wavefunction/half_rotate.py
+++ b/ipie/trial_wavefunction/half_rotate.py
@@ -3,6 +3,7 @@
 import numpy as np
 
 from ipie.hamiltonians.generic import Generic, GenericComplexChol, GenericRealChol
+from ipie.hamiltonians.generic_chunked import GenericRealCholChunked
 from ipie.trial_wavefunction.wavefunction_base import TrialWavefunctionBase
 from ipie.utils.mpi import get_shared_array
 
@@ -113,3 +114,97 @@ def half_rotate_generic(
 
     # storing intermediates for correlation energy
     return (rH1a, rH1b), (rchola, rcholb)
+
+
+def half_rotate_chunked(
+    trial: TrialWavefunctionBase,
+    hamiltonian: Generic,
+    comm,
+    orbsa: np.ndarray,
+    orbsb: np.ndarray,
+    ndets: int = 1,
+    verbose: bool = False,
+) -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]:
+    handler = trial.handler
+    if verbose:
+        print("# Constructing half rotated Cholesky vectors.")
+    assert len(orbsa.shape) == 3
+    assert len(orbsb.shape) == 3
+    assert orbsa.shape[0] == ndets
+    assert orbsb.shape[0] == ndets
+    M = hamiltonian.nbasis
+    nchol = hamiltonian.nchol
+    na = orbsa.shape[-1]
+    nb = orbsb.shape[-1]
+    if trial.verbose:
+        print(f"# Shape of alpha half-rotated Cholesky: {ndets, nchol, na * M}")
+        print(f"# Shape of beta half-rotated Cholesky: {ndets, nchol, nb * M}")
+
+    chol_chunk = hamiltonian.chol_chunk.reshape((M, M, -1))
+    ctype = hamiltonian.chol_chunk.dtype
+    ptype = orbsa.dtype
+    integral_type = ctype if ctype.itemsize > ptype.itemsize else ptype
+    if isinstance(hamiltonian, GenericComplexChol) or isinstance(hamiltonian, GenericRealChol):
+        raise NotImplementedError
+    elif isinstance(hamiltonian, GenericRealCholChunked):
+        rchola_chunk = [np.zeros((ndets, hamiltonian.nchol_chunk, (M * na)), dtype=integral_type)]
+        rcholb_chunk = [np.zeros((ndets, hamiltonian.nchol_chunk, (M * nb)), dtype=integral_type)]
+    rH1a = np.einsum("Jpi,pq->Jiq", orbsa.conj(), hamiltonian.H1[0], optimize=True)
+    rH1b = np.einsum("Jpi,pq->Jiq", orbsb.conj(), hamiltonian.H1[1], optimize=True)
+
+    if verbose:
+        print("# Half-Rotating Cholesky for determinant.")
+    # start = i*M*(na+nb)
+    start_a = 0  # determinant loops
+    start_b = 0
+    compute = True
+    # Distribute amongst MPI tasks on this node.
+    if comm is not None:
+        nwork_per_thread = hamiltonian.nchol // comm.size
+        if nwork_per_thread == 0:
+            start_n = 0
+            end_n = nchol
+            if comm.rank != 0:
+                # Just run on root processor if problem too small.
+                compute = False
+        else:
+            start_n = comm.rank * nwork_per_thread  # Cholesky work split
+            end_n = (comm.rank + 1) * nwork_per_thread
+            if comm.rank == comm.size - 1:
+                end_n = nchol
+    else:
+        start_n = 0
+        end_n = hamiltonian.nchol
+
+    start_n = hamiltonian.chunk_displacements[handler.srank]
+    end_n = hamiltonian.chunk_displacements[handler.srank + 1]
+
+    nchol_loc = end_n - start_n
+    if compute:
+        # Investigate whether these einsums are fast in the future
+        rup = np.einsum(
+            "Jmi,mnx->Jxin",
+            orbsa.conj(),
+            chol_chunk,
+            optimize=True,
+        )
+        rup = rup.reshape((ndets, nchol_loc, na * M))
+        rdn = np.einsum(
+            "Jmi,mnx->Jxin",
+            orbsb.conj(),
+            chol_chunk,
+            optimize=True,
+        )
+        rdn = rdn.reshape((ndets, nchol_loc, nb * M))
+        rchola_chunk[0][:, :, start_a : start_a + M * na] = rup[:]
+        rcholb_chunk[0][:, :, start_b : start_b + M * nb] = rdn[:]
+
+    if comm is not None:
+        comm.barrier()
+
+    if isinstance(hamiltonian, GenericRealCholChunked):
+        rchola = rchola_chunk[0]
+        rcholb = rcholb_chunk[0]
+
+    # storing intermediates for correlation energy
+    return (rH1a, rH1b), (rchola, rcholb)
diff --git a/ipie/trial_wavefunction/noci.py b/ipie/trial_wavefunction/noci.py
index b369fef6..c18c91d1 100644
--- a/ipie/trial_wavefunction/noci.py
+++ b/ipie/trial_wavefunction/noci.py
@@ -31,8 +31,7 @@ def __init__(self, wavefunction, num_elec, num_basis, verbose=False):
         self.psib = self.psi[:, :, self.nalpha :]
         self.G = self.build_one_rdm()
 
-    def build(self) -> None:
-        ...
+    def build(self) -> None: ...
 
     def calculate_energy(self, system, hamiltonian):
         return variational_energy_noci(system, hamiltonian, self)
diff --git a/ipie/trial_wavefunction/particle_hole.py b/ipie/trial_wavefunction/particle_hole.py
index fd891b24..cb099e15 100644
--- a/ipie/trial_wavefunction/particle_hole.py
+++ b/ipie/trial_wavefunction/particle_hole.py
@@ -49,9 +49,11 @@ def __init__(
         self.setup_basic_wavefunction(
             wfn, num_dets=num_dets_for_trial, use_active_space=use_active_space
         )
-        self._num_dets_for_props = num_dets_for_props
         self._num_dets = len(self.coeffs)
-        self._num_dets_for_props = num_dets_for_props
+        self._num_dets_for_props = (
+            self._num_dets if num_dets_for_props == -1 else num_dets_for_props
+        )
+        self._num_dets_for_props = min(self._num_dets, self._num_dets_for_props)
         self._num_dets_for_trial = num_dets_for_trial
         self._num_det_chunks = num_det_chunks
         self.ortho_expansion = True
@@ -84,9 +86,11 @@ def setup_basic_wavefunction(self, wfn, num_dets=None, use_active_space=True):
             core = [i for i in range(num_melting)]
             occa = [np.array(core + [o + num_melting for o in oa]) for oa in occa0]
             occb = [np.array(core + [o + num_melting for o in ob]) for ob in occb0]
+            self.nmelting = nmelting_a
         else:
             occa = wfn[1][:num_dets]
             occb = wfn[2][:num_dets]
+            self.nmelting = 0
         # Store alpha electrons first followed by beta electrons.
         # FDM Remove this with wicks helper proper integration
         dets = [list(a) + [i + self.nbasis for i in c] for (a, c) in zip(occa, occb)]
@@ -416,6 +420,12 @@ def build_one_rdm(self):
             if self.verbose:
                 print("# Using Wicks helper to compute 1-RDM.")
             assert wicks_helper is not None
+            max_orb = max(np.max(self.occa), np.max(self.occb))
+            err_msg = (
+                f"Number of orbitals is too large for wicks_helper {max_orb} "
+                f"vs {64*wicks_helper.DET_LEN}."
+            )
+            assert 2 * max_orb < 64 * wicks_helper.DET_LEN, err_msg
             dets = wicks_helper.encode_dets(self.occa, self.occb)
             phases = wicks_helper.convert_phase(self.occa, self.occb)
             _keep = self.num_dets_for_props
@@ -430,6 +440,11 @@ def build_one_rdm(self):
                 print(f"# Time to compute 1-RDM: {end - start} s")
         else:
             self.G = self.compute_1rdm(self.nbasis)
+        tr_g = self.G[0].trace() + self.G[1].trace()
+        err_msg = f"Tr(G_T) is incorrect {tr_g} vs {self.nalpha + self.nbeta}"
+        assert np.isclose(tr_g, self.nalpha + self.nbeta), err_msg
+        if self.verbose:
+            print(f"# Tr(G_T): {tr_g}")
 
     def calc_greens_function(self, walkers) -> np.ndarray:
         return greens_function_multi_det_wicks_opt(walkers, self)
@@ -442,7 +457,9 @@ def calc_force_bias(self, hamiltonian, walkers, mpi_handler=None) -> np.ndarray:
 
     def compute_1rdm(self, nbasis):
         assert self.ortho_expansion == True
-        denom = np.sum(self.coeffs.conj() * self.coeffs)
+        denom = np.sum(
+            self.coeffs[: self.num_dets_for_props].conj() * self.coeffs[: self.num_dets_for_props]
+        )
         Pa = np.zeros((nbasis, nbasis), dtype=np.complex128)
         Pb = np.zeros((nbasis, nbasis), dtype=np.complex128)
         P = [Pa, Pb]
@@ -457,13 +474,13 @@ def map_orb(orb, nbasis):
                 ix = orb - nbasis
             return ix, s
 
-        for idet in range(self.num_dets):
+        for idet in range(self.num_dets_for_props):
             di = self.spin_occs[idet]
             # zero excitation case
             for iorb in range(len(di)):
                 ii, spin_ii = map_orb(di[iorb], nbasis)
                 P[spin_ii][ii, ii] += self.coeffs[idet].conj() * self.coeffs[idet]
-            for jdet in range(idet + 1, self.num_dets):
+            for jdet in range(idet + 1, self.num_dets_for_props):
                 dj = self.spin_occs[jdet]
                 from_orb = list(set(dj) - set(di))
                 to_orb = list(set(di) - set(dj))
diff --git a/ipie/trial_wavefunction/single_det.py b/ipie/trial_wavefunction/single_det.py
index cf348a36..abc043fd 100644
--- a/ipie/trial_wavefunction/single_det.py
+++ b/ipie/trial_wavefunction/single_det.py
@@ -12,21 +12,23 @@
 )
 from ipie.estimators.utils import gab_spin
 from ipie.hamiltonians.generic import GenericComplexChol, GenericRealChol
+from ipie.hamiltonians.generic_chunked import GenericRealCholChunked
 from ipie.propagation.force_bias import (
     construct_force_bias_batch_single_det,
     construct_force_bias_batch_single_det_chunked,
 )
 from ipie.propagation.overlap import calc_overlap_single_det_uhf
-from ipie.trial_wavefunction.half_rotate import half_rotate_generic
+from ipie.trial_wavefunction.half_rotate import half_rotate_generic, half_rotate_chunked
 from ipie.trial_wavefunction.wavefunction_base import TrialWavefunctionBase
 from ipie.utils.backend import arraylib as xp
 from ipie.utils.mpi import MPIHandler
 from ipie.walkers.uhf_walkers import UHFWalkers
+from typing import Union
 
 
 # class for UHF trial
 class SingleDet(TrialWavefunctionBase):
-    def __init__(self, wavefunction, num_elec, num_basis, verbose=False):
+    def __init__(self, wavefunction, num_elec, num_basis, handler=MPIHandler(), verbose=False):
         assert isinstance(wavefunction, numpy.ndarray)
         assert len(wavefunction.shape) == 2
         super().__init__(wavefunction, num_elec, num_basis, verbose=verbose)
@@ -44,6 +46,10 @@ def __init__(self, wavefunction, num_elec, num_basis, verbose=False):
         self.psi0a = self.psi[:, : self.nalpha]
         self.psi0b = self.psi[:, self.nalpha :]
         self.G, self.Ghalf = gab_spin(self.psi, self.psi, self.nalpha, self.nbeta)
+        self.handler = handler
+
+        self.psi0a = numpy.ascontiguousarray(self.psi0a)
+        self.psi0b = numpy.ascontiguousarray(self.psi0b)
 
     def build(self) -> None:
         pass
@@ -104,6 +110,37 @@ def half_rotate(
         self._rcholb = rot_chol[1][0]
         self.half_rotated = True
 
+    @plum.dispatch
+    def half_rotate(
+        self: "SingleDet",
+        hamiltonian: GenericRealCholChunked,
+        comm: Optional[CommType] = MPI.COMM_WORLD,
+    ):
+        num_dets = 1
+        orbsa = self.psi0a.reshape((num_dets, self.nbasis, self.nalpha))
+        orbsb = self.psi0b.reshape((num_dets, self.nbasis, self.nbeta))
+        rot_1body, rot_chol = half_rotate_chunked(
+            self,
+            hamiltonian,
+            comm,
+            orbsa,
+            orbsb,
+            ndets=num_dets,
+            verbose=self.verbose,
+        )
+        # Single determinant functions do not expect determinant index, so just
+        # grab zeroth element.
+        self._rH1a = rot_1body[0][0]
+        self._rH1b = rot_1body[1][0]
+        self._rchola_chunk = rot_chol[0][0]
+        self._rcholb_chunk = rot_chol[1][0]
+        self.half_rotated = True
+
+        # rot_1body_1 = numpy.load('../Test_Disk_nochunk/rot_1body.npy')
+        # rot_chol_1 = numpy.load('../Test_Disk_nochunk/rot_chol.npy')
+
+        # print('compare', [numpy.allclose(rot_1body, rot_1body_1), numpy.allclose(rot_chol, rot_chol_1)])
+
     @plum.dispatch
     def half_rotate(
         self: "SingleDet",
@@ -148,10 +185,10 @@ def calc_greens_function(self, walkers, build_full: bool = False) -> numpy.ndarr
     @plum.dispatch
     def calc_force_bias(
         self,
-        hamiltonian: GenericRealChol,
+        hamiltonian: Union[GenericRealChol, GenericRealCholChunked],
         walkers: UHFWalkers,
         mpi_handler: MPIHandler,
-    ) -> numpy.ndarray:
+    ) -> xp.ndarray:
         if hamiltonian.chunked:
             return construct_force_bias_batch_single_det_chunked(
                 hamiltonian, walkers, self, mpi_handler
diff --git a/ipie/trial_wavefunction/tests/__init__.py b/ipie/trial_wavefunction/tests/__init__.py
new file mode 100644
index 00000000..c70e5c37
--- /dev/null
+++ b/ipie/trial_wavefunction/tests/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2022 The ipie Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors: Fionn Malone <fionn.malone@gmail.com>
+#          Joonho Lee
+#
diff --git a/ipie/trial_wavefunction/tests/test_noci.py b/ipie/trial_wavefunction/tests/test_noci.py
index 1db2004b..a8a96c12 100644
--- a/ipie/trial_wavefunction/tests/test_noci.py
+++ b/ipie/trial_wavefunction/tests/test_noci.py
@@ -33,3 +33,7 @@ def test_noci():
     assert trial._rH1a.shape == (trial.num_dets, nalpha, nbasis)
     assert trial._rH1b.shape == (trial.num_dets, nbeta, nbasis)
     trial.calculate_energy(sys, ham)
+
+
+if __name__ == "__main__":
+    test_noci()
diff --git a/ipie/trial_wavefunction/tests/test_particle_hole.py b/ipie/trial_wavefunction/tests/test_particle_hole.py
index 3acbccfc..d10021b3 100644
--- a/ipie/trial_wavefunction/tests/test_particle_hole.py
+++ b/ipie/trial_wavefunction/tests/test_particle_hole.py
@@ -2,6 +2,13 @@
 import pytest
 
 from ipie.config import MPI
+
+try:
+    from ipie.lib.wicks import wicks_helper
+
+    no_wicks = False
+except ImportError:
+    no_wicks = True
 from ipie.trial_wavefunction.particle_hole import (
     ParticleHole,
     ParticleHoleNonChunked,
@@ -30,6 +37,7 @@ def test_wicks_slow():
     assert len(trial.phase_a) == trial.num_dets
     assert len(trial.phase_b) == trial.num_dets
     trial.num_dets = 10
+    trial.num_dets_for_props = 10
     trial.build()
     assert len(trial.cre_a) == trial.num_dets
     assert len(trial.cre_b) == trial.num_dets
@@ -147,3 +155,35 @@ def test_wicks_opt_chunked():
     assert trial._rH1b.shape == (nbeta, nbasis)
     assert trial._rchola_act.shape == (naux, nbasis * trial.nact)
     assert trial._rcholb_act.shape == (naux, nbasis * trial.nact)
+
+
+@pytest.mark.wicks
+@pytest.mark.skipif(no_wicks, reason="lib.wicks not found.")
+@pytest.mark.parametrize(
+    "nalpha,nbeta,nbasis", ((4, 4, 10), (4, 7, 12), (36, 36, 47), (64, 65, 72))
+)
+def test_opt_one_rdm(nalpha, nbeta, nbasis):
+    wavefunction, _ = get_random_phmsd_opt(nalpha, nbeta, nbasis, ndet=100, cmplx_coeffs=False)
+    trial = ParticleHole(
+        wavefunction,
+        (nalpha, nbeta),
+        nbasis,
+        verbose=False,
+        num_dets_for_props=len(wavefunction[0]),
+    )
+    ref = trial.compute_1rdm(nbasis)
+    assert np.allclose(trial.G, ref)
+    wavefunction, _ = get_random_phmsd_opt(
+        nalpha, nbeta, nbasis, ndet=len(wavefunction[0]), cmplx_coeffs=True
+    )
+    trial = ParticleHole(
+        wavefunction,
+        (nalpha, nbeta),
+        nbasis,
+        verbose=False,
+        num_dets_for_props=(len(wavefunction[0])),
+    )
+    ref = trial.compute_1rdm(nbasis)
+    # TODO: Fix convention.
+    assert np.allclose(trial.G[0], ref[0].T)
+    assert np.allclose(trial.G[1], ref[1].T)
diff --git a/ipie/trial_wavefunction/tests/test_wavefunction_base.py b/ipie/trial_wavefunction/tests/test_wavefunction_base.py
index bd7d168f..43b6f20b 100644
--- a/ipie/trial_wavefunction/tests/test_wavefunction_base.py
+++ b/ipie/trial_wavefunction/tests/test_wavefunction_base.py
@@ -16,3 +16,7 @@ def test_wavefunction_base():
     )
     assert trial.nelec == (nalpha, nbeta)
     assert trial.nbasis == num_basis
+
+
+if __name__ == "__main__":
+    test_wavefunction_base()
diff --git a/ipie/trial_wavefunction/utils.py b/ipie/trial_wavefunction/utils.py
index ac12aa85..05def0e9 100644
--- a/ipie/trial_wavefunction/utils.py
+++ b/ipie/trial_wavefunction/utils.py
@@ -38,7 +38,7 @@ def get_trial_wavefunction(
     nbasis: int,
     wfn_file: str,
     ndets: int = -1,
-    ndets_props: int = -1,
+    ndets_props: int = 100,
     ndet_chunks: int = 1,
     verbose=False,
 ):
@@ -56,7 +56,6 @@ def get_trial_wavefunction(
     trial : class or None
         Trial wavfunction class.
     """
-    assert ndets_props <= ndets
     wfn_type = determine_wavefunction_type(wfn_file)
     if wfn_type == "particle_hole":
         wfn, _ = read_particle_hole_wavefunction(wfn_file)
@@ -103,7 +102,6 @@ def get_trial_wavefunction(
         trial = setup_qmcpack_wavefunction(wfn_file, ndets, ndets_props, ndet_chunks)
     else:
         raise RuntimeError("Unknown wavefunction type")
-    trial.build()
 
     return trial
 
diff --git a/ipie/trial_wavefunction/wavefunction_base.py b/ipie/trial_wavefunction/wavefunction_base.py
index f1eb5531..01c79500 100644
--- a/ipie/trial_wavefunction/wavefunction_base.py
+++ b/ipie/trial_wavefunction/wavefunction_base.py
@@ -42,12 +42,11 @@ def __init__(
         self.e2b = None
         self.energy = None
 
-    def cast_to_cupy(self) -> None:
-        cast_to_device(self, self.verbose)
+    def cast_to_cupy(self, verbose=False):
+        cast_to_device(self, verbose=verbose)
 
     @abstractmethod
-    def build(self) -> None:
-        ...
+    def build(self) -> None: ...
 
     @property
     def num_dets(self) -> int:
@@ -71,16 +70,13 @@ def half_rotated(self, is_half_rotated) -> None:
         self._half_rotated = is_half_rotated
 
     @abstractmethod
-    def half_rotate(self, hamiltonian, comm: Optional[CommType] = MPI.COMM_WORLD) -> None:
-        ...
+    def half_rotate(self, hamiltonian, comm: Optional[CommType] = MPI.COMM_WORLD) -> None: ...
 
     @abstractmethod
-    def calc_overlap(self, walkers) -> np.ndarray:
-        ...
+    def calc_overlap(self, walkers) -> np.ndarray: ...
 
     @abstractmethod
-    def calc_greens_function(self, walkers) -> np.ndarray:
-        ...
+    def calc_greens_function(self, walkers) -> np.ndarray: ...
 
     @abstractmethod
     def calc_force_bias(self, hamiltonian, walkers, mpi_handler) -> np.ndarray:
@@ -100,3 +96,6 @@ def chunk(self, handler):
         assert self._rchola.size == tot_size
         tot_size = handler.allreduce_group(self._rcholb_chunk.size)
         assert self._rcholb.size == tot_size
+
+        del self._rchola
+        del self._rcholb
diff --git a/ipie/utils/backend.py b/ipie/utils/backend.py
index 0b1da689..828922f9 100644
--- a/ipie/utils/backend.py
+++ b/ipie/utils/backend.py
@@ -93,8 +93,9 @@ def cast_to_device(self, verbose=False):
         expected_bytes = size * 16.0
         expected_gb = expected_bytes / 1024.0**3.0
         print(f"# {self.__class__.__name__}: expected to allocate {expected_gb} GB")
-
     for k, v in self.__dict__.items():
+        if k in ["Ga", "Gb"]:
+            continue  # reduce mem usage, Ga/Gb not used, use Ghalf instead
         if isinstance(v, _np.ndarray):
             self.__dict__[k] = arraylib.array(v)
         elif isinstance(v, list) and isinstance(v[0], _np.ndarray):
diff --git a/ipie/utils/chunk_large_chol.py b/ipie/utils/chunk_large_chol.py
new file mode 100644
index 00000000..325fd9f5
--- /dev/null
+++ b/ipie/utils/chunk_large_chol.py
@@ -0,0 +1,37 @@
+from ipie.utils.mpi import make_splits_displacements
+import h5py
+import numpy as np
+
+
+def split_cholesky(ham_filename: str, nmembers: int, verbose=True):
+    """
+    This function calculates the splits and displacements needed to distribute the
+    Cholesky vectors among the members and  splits the Cholesky decomposed Hamiltonian
+    vectors stored in an HDF5 file among a given number of members
+    (e.g., GPU cards to distribute total cholesky)
+
+    Parameters
+    ----------
+    ham_filename : str
+        The filename of the HDF5 file containing the total Cholesky (naux, nbas, nbas)
+    nmembers : int
+        The number of members among which the Cholesky vectors will be distributed.
+    """
+    with h5py.File(ham_filename, "r") as source_file:
+        # for huge chol file, should read in slices at one time instead of this
+        dataset = np.array(source_file["LXmn"][:])
+        num_chol = dataset.shape[0]
+    split_sizes, displacements = make_splits_displacements(num_chol, nmembers)
+    dataset = dataset.transpose(1, 2, 0).reshape(-1, num_chol)
+
+    for i, (size, displacement) in enumerate(zip(split_sizes, displacements)):
+        # Prepare row indices for slicing
+        row_start = displacement
+        row_end = displacement + size
+        with h5py.File(f"chol_{i}.h5", "w") as target_file:
+            target_file.create_dataset("chol", data=dataset[:, row_start:row_end])
+        if verbose:
+            print(f"# Split {i}: Size {size}, Displacement {displacement}")
+
+    if verbose:
+        print("# Splitting complete.")
diff --git a/ipie/utils/from_dice.py b/ipie/utils/from_dice.py
index 433ee501..566bc0ef 100644
--- a/ipie/utils/from_dice.py
+++ b/ipie/utils/from_dice.py
@@ -1,4 +1,5 @@
 """Utilities for runnning dice through the shciscf plugin."""
+
 import glob
 import os
 import struct
diff --git a/ipie/utils/misc.py b/ipie/utils/misc.py
index 4c956d98..b8195f9d 100644
--- a/ipie/utils/misc.py
+++ b/ipie/utils/misc.py
@@ -25,6 +25,7 @@
 import time
 import types
 from functools import reduce
+from typing import Dict
 
 import numpy
 import scipy.sparse
@@ -294,6 +295,37 @@ def get_node_mem():
         return 0.0
 
 
+def get_numpy_blas_info() -> Dict[str, str]:
+    """Get useful numpy blas / lapack info."""
+    info = {}
+    try:
+        config = numpy.show_config(mode="dicts")
+        blas_config = config["Build Dependencies"]["blas"]
+        info["BLAS"] = {
+            "lib": blas_config["name"],
+            "version": blas_config["version"],
+            "include directory": blas_config["lib directory"],
+        }
+        for k, v in info["BLAS"].items():
+            print(f"# - BLAS {k}: {v}")
+    except TypeError:
+        try:
+            np_lib = numpy.__config__.blas_opt_info["libraries"]  # pylint:  disable=no-member
+            lib_dir = numpy.__config__.blas_opt_info["library_dirs"]  # pylint: disable=no-member
+        except AttributeError:
+            np_lib = numpy.__config__.blas_ilp64_opt_info["libraries"]  # pylint:  disable=no-member
+            lib_dir = numpy.__config__.blas_ilp64_opt_info[  # pylint:  disable=no-member
+                "library_dirs"
+            ]
+        print(f"# - BLAS lib: {' '.join(np_lib):s}")
+        print(f"# - BLAS dir: {' '.join(lib_dir):s}")
+        info["BLAS"] = {
+            "lib": " ".join(np_lib),
+            "path": " ".join(lib_dir),
+        }
+    return info
+
+
 def print_env_info(sha1, branch, local_mods, uuid, nranks):
     import ipie
 
@@ -329,20 +361,7 @@ def print_env_info(sha1, branch, local_mods, uuid, nranks):
             print(f"# Using {lib:s} v{vers:s} from: {path:s}.")
             info[f"{lib:s}"] = {"version": vers, "path": path}
             if lib == "numpy":
-                try:
-                    np_lib = l.__config__.blas_opt_info["libraries"]
-                except AttributeError:
-                    np_lib = l.__config__.blas_ilp64_opt_info["libraries"]
-                print(f"# - BLAS lib: {' '.join(np_lib):s}")
-                try:
-                    lib_dir = l.__config__.blas_opt_info["library_dirs"]
-                except AttributeError:
-                    lib_dir = l.__config__.blas_ilp64_opt_info["library_dirs"]
-                print(f"# - BLAS dir: {' '.join(lib_dir):s}")
-                info[f"{lib:s}"]["BLAS"] = {
-                    "lib": " ".join(np_lib),
-                    "path": " ".join(lib_dir),
-                }
+                info[f"{lib:s}"] = get_numpy_blas_info()
             elif lib == "mpi4py":
                 mpicc = l.get_config().get("mpicc", "none")
                 print(f"# - mpicc: {mpicc:s}")
diff --git a/ipie/utils/mpi.py b/ipie/utils/mpi.py
index c37d2543..4ce3b806 100644
--- a/ipie/utils/mpi.py
+++ b/ipie/utils/mpi.py
@@ -76,7 +76,7 @@ def scatter_group(self, array, root=0):  # scatter within a group
         elif isinstance(array, np.ndarray):
             if len(array.shape) == 2:
                 ncols = array.shape[1]
-                my_array = np.zeros((split_sizes[self.srank], ncols), dtype=array.dtype)
+                my_array = np.zeros((split_sizes[self.srank], ncols), dtype=np.float64)
                 self.scomm.Scatterv(
                     [array, split_sizes * ncols, displacements * ncols, MPI.DOUBLE],
                     my_array,
diff --git a/ipie/utils/testing.py b/ipie/utils/testing.py
index fc5d5775..c141766e 100644
--- a/ipie/utils/testing.py
+++ b/ipie/utils/testing.py
@@ -48,9 +48,11 @@ def generate_hamiltonian(nmo, nelec, cplx=False, sym=8, tol=1e-3):
     h1e = numpy.random.random((nmo, nmo))
     if cplx:
         h1e = h1e + 1j * numpy.random.random((nmo, nmo))
+
     eri = numpy.random.normal(scale=0.01, size=(nmo, nmo, nmo, nmo))
     if cplx:
         eri = eri + 1j * numpy.random.normal(scale=0.01, size=(nmo, nmo, nmo, nmo))
+
     # Restore symmetry to the integrals.
     if sym >= 4:
         # (ik|jl) = (jl|ik)
@@ -229,7 +231,7 @@ def get_random_phmsd_opt(nup, ndown, nbasis, ndet=10, init=False, dist=None, cmp
             dets += list(itertools.product(oa, ob))
     occ_a, occ_b = zip(*dets)
     _ndet = min(len(occ_a), ndet)
-    wfn = (coeffs, list(occ_a[:_ndet]), list(occ_b[:_ndet]))
+    wfn = (coeffs[:_ndet], list(occ_a[:_ndet]), list(occ_b[:_ndet]))
     return wfn, init_wfn
 
 
diff --git a/ipie/utils/tests/test_io.py b/ipie/utils/tests/test_io.py
index f3285f9f..8c9fc60e 100644
--- a/ipie/utils/tests/test_io.py
+++ b/ipie/utils/tests/test_io.py
@@ -23,6 +23,40 @@
 
 from ipie.utils.io import read_hamiltonian, read_wavefunction, write_hamiltonian, write_wavefunction
 from ipie.utils.testing import get_random_phmsd_opt
+import h5py
+from ipie.utils.chunk_large_chol import split_cholesky
+import tempfile
+
+
+@pytest.mark.unit
+def test_split_cholesky():
+    naux = 105
+    nbas = 10
+    with tempfile.NamedTemporaryFile(suffix=".h5", delete=False) as temp_file:
+        temp_hdf5_filename = temp_file.name
+    mock_data = np.random.rand(naux, nbas, nbas)
+
+    with h5py.File(temp_hdf5_filename, "w") as f:
+        f.create_dataset("LXmn", data=mock_data)
+
+    nmembers = 4
+    split_cholesky(temp_hdf5_filename, nmembers, verbose=False)
+
+    collected_data = []
+    total_elements = 0
+    for i in range(nmembers):
+        with h5py.File(f"chol_{i}.h5", "r") as f:
+            chol_data = f["chol"][()]
+            collected_data.append(chol_data)
+            assert chol_data.ndim == 2
+            assert chol_data.shape[0] == nbas**2
+            total_elements += chol_data.size
+    assert total_elements == mock_data.size
+    collected_data = np.hstack(collected_data)
+    assert np.allclose(mock_data, collected_data.T.reshape(naux, nbas, nbas))
+
+    for i in range(nmembers):
+        os.remove(f"chol_{i}.h5")
 
 
 @pytest.mark.unit
diff --git a/ipie/walkers/base_walkers.py b/ipie/walkers/base_walkers.py
index 7f675cba..3c1fd141 100644
--- a/ipie/walkers/base_walkers.py
+++ b/ipie/walkers/base_walkers.py
@@ -120,9 +120,9 @@ def __init__(
 
         self.hybrid_energy = numpy.array([0.0 for iw in range(self.nwalkers)])
         self.detR = [1.0 for iw in range(self.nwalkers)]
-        self.detR_shift = numpy.array([0.0 for iw in range(self.nwalkers)])
+        self.detR_shift = xp.array([0.0 for iw in range(self.nwalkers)])
         self.log_detR = [0.0 for iw in range(self.nwalkers)]
-        self.log_shift = numpy.array([0.0 for iw in range(self.nwalkers)])
+        self.log_shift = xp.array([0.0 for iw in range(self.nwalkers)])
         self.log_detR_shift = [0.0 for iw in range(self.nwalkers)]
 
         self.buff_names = [
diff --git a/ipie/walkers/pop_controller.py b/ipie/walkers/pop_controller.py
index dd76b4c4..1cd86b45 100644
--- a/ipie/walkers/pop_controller.py
+++ b/ipie/walkers/pop_controller.py
@@ -89,6 +89,8 @@ def pop_control(self, walkers, comm):
         else:
             sum_weights = numpy.sum(weights)
             total_weight = numpy.empty(1, dtype=numpy.float64)
+            if hasattr(sum_weights, "get"):
+                sum_weights = sum_weights.get()
             comm.Reduce(sum_weights, total_weight, op=MPI.SUM, root=0)
             comm.Bcast(total_weight, root=0)
             total_weight = total_weight[0]
@@ -133,7 +135,7 @@ def get_buffer(walkers, iw):
         Relevant walker information for population control.
     """
     s = 0
-    buff = numpy.zeros(walkers.buff_size, dtype=numpy.complex128)
+    buff = xp.zeros(walkers.buff_size, dtype=numpy.complex128)
     for d in walkers.buff_names:
         data = walkers.__dict__[d]
         if data is None:
@@ -315,17 +317,17 @@ def pair_branch(walkers, comm, max_weight, min_weight, timer=PopControllerTimer(
         glob_inf_1 = numpy.empty([comm.size, walkers.nwalkers], dtype=numpy.int64)
         glob_inf_1.fill(1)
         glob_inf_2 = numpy.array(
-            [[r for i in range(walkers.nwalkers)] for r in range(comm.size)],
-            dtype=numpy.int64,
+            [[r for i in range(walkers.nwalkers)] for r in range(comm.size)], dtype=numpy.int64
         )
         glob_inf_3 = numpy.array(
-            [[r for i in range(walkers.nwalkers)] for r in range(comm.size)],
-            dtype=numpy.int64,
+            [[r for i in range(walkers.nwalkers)] for r in range(comm.size)], dtype=numpy.int64
         )
 
     timer.add_non_communication()
 
     timer.start_time()
+    if hasattr(walker_info_0, "get"):
+        walker_info_0 = walker_info_0.get()
     comm.Gather(
         walker_info_0, glob_inf_0, root=0
     )  # gather |w_i| from all processors (comm.size x nwalkers)
@@ -337,17 +339,13 @@ def pair_branch(walkers, comm, max_weight, min_weight, timer=PopControllerTimer(
         # Rescale weights.
         glob_inf = numpy.zeros((walkers.nwalkers * comm.size, 4), dtype=numpy.float64)
         glob_inf[:, 0] = glob_inf_0.ravel()  # contains walker |w_i|
-        glob_inf[
-            :, 1
-        ] = glob_inf_1.ravel()  # all initialized to 1 when it becomes 2 then it will be "branched"
-        glob_inf[
-            :, 2
-        ] = (
+        glob_inf[:, 1] = (
+            glob_inf_1.ravel()
+        )  # all initialized to 1 when it becomes 2 then it will be "branched"
+        glob_inf[:, 2] = (
             glob_inf_2.ravel()
         )  # contain processor+walker indices (initial) (i.e., where walkers live)
-        glob_inf[
-            :, 3
-        ] = (
+        glob_inf[:, 3] = (
             glob_inf_3.ravel()
         )  # contain processor+walker indices (final) (i.e., where walkers live)
         sort = numpy.argsort(glob_inf[:, 0], kind="mergesort")
diff --git a/ipie/walkers/tests/test_population_control.py b/ipie/walkers/tests/test_population_control.py
index 4dcd6374..1ee26d5f 100644
--- a/ipie/walkers/tests/test_population_control.py
+++ b/ipie/walkers/tests/test_population_control.py
@@ -68,6 +68,10 @@ def test_pair_branch_batch():
             batched_data.walkers.weight[iw],
             legacy_data.walker_handler.walkers[iw].weight,
         )
+        assert numpy.allclose(
+            batched_data.walkers.unscaled_weight[iw],
+            legacy_data.walker_handler.walkers[iw].unscaled_weight,
+        )
 
     assert pytest.approx(batched_data.walkers.weight[0]) == 0.2571750688329709
     assert pytest.approx(batched_data.walkers.weight[1]) == 1.0843219322894988
diff --git a/ipie/walkers/uhf_walkers.py b/ipie/walkers/uhf_walkers.py
index 517b5ca8..5811b184 100644
--- a/ipie/walkers/uhf_walkers.py
+++ b/ipie/walkers/uhf_walkers.py
@@ -54,13 +54,13 @@ def __init__(
         # should completely deprecate these
         self.field_configs = None
 
-        self.phia = numpy.array(
+        self.phia = xp.array(
             [initial_walker[:, : self.nup].copy() for iw in range(self.nwalkers)],
-            dtype=numpy.complex128,
+            dtype=xp.complex128,
         )
-        self.phib = numpy.array(
+        self.phib = xp.array(
             [initial_walker[:, self.nup :].copy() for iw in range(self.nwalkers)],
-            dtype=numpy.complex128,
+            dtype=xp.complex128,
         )
 
         # will be built only on request
@@ -151,7 +151,6 @@ def reortho_batched(self):
             (self.phib, Rdn) = qr(self.phib, mode=qr_mode)
             Rdn_diag = xp.einsum("wii->wi", Rdn)
             log_det += xp.einsum("wi->w", xp.log(abs(Rdn_diag)))
-
         self.detR = xp.exp(log_det - self.detR_shift)
         self.ovlp = self.ovlp / self.detR
 
diff --git a/pytest.ini b/pytest.ini
index 63efce45..5315d9b7 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -5,5 +5,5 @@ markers =
     mpi: mark a test as a MPI intgration test
     wicks: lib file (used to filter out lib until cmake setup)
     gpu: mark a test as a gpu unit test
-addopts = -rs -v -m "unit or driver or mpi" --ignore=ipie/lib
+addopts = -rs -v -m "unit or driver or mpi"
 
diff --git a/requirements.txt b/requirements.txt
index 3d9432a0..41dbc0a9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
 cython >= 0.29.0
 h5py >= 3.0.0
-numpy >= 1.20.0, < 1.26.0
-scipy >= 1.3.0, <=1.10.1
+numpy >= 1.20.0
+scipy >= 1.3.0
 pytest
-pandas == 1.5.1 # issue with pyblock and pandas > 2
+pandas
 numba
 plum-dispatch
diff --git a/setup.py b/setup.py
index c2d75534..0cb2f5f4 100644
--- a/setup.py
+++ b/setup.py
@@ -53,7 +53,6 @@ def version_number(path: str) -> str:
     return version
 
 
-
 def main() -> None:
     version_path = "ipie/_version.py"
     __version__ = version_number(version_path)
@@ -67,7 +66,7 @@ def main() -> None:
         packages=find_packages(exclude=["examples", "docs", "tests", "tools", "setup.py"]),
         license="Apache 2.0",
         description="Python implementations of Imaginary-time Evolution algorithms",
-        python_requires=">=3.7.0,<3.12.0",
+        python_requires=">=3.8.0",
         scripts=[
             "bin/ipie",
             "tools/extract_dice.py",
@@ -84,5 +83,6 @@ def main() -> None:
         long_description=open("README.rst").read(),
     )
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/timing_scripts/afqmc_batch_timing.py b/timing_scripts/afqmc_batch_timing.py
index 0caacc86..bd434817 100644
--- a/timing_scripts/afqmc_batch_timing.py
+++ b/timing_scripts/afqmc_batch_timing.py
@@ -17,7 +17,7 @@
 
 from mpi4py import MPI
 
-nelec = (5,5)
+nelec = (5, 5)
 nwalkers = 10
 nsteps = 10
 
@@ -26,21 +26,10 @@
         "nup": nelec[0],
         "ndown": nelec[1],
     },
-    "hamiltonian": {
-        "name": "Generic",
-        "integrals": "afqmc.h5"
-    },
-    "qmc": {
-        "dt": 0.01,
-        "nsteps": nsteps,
-        "nwalkers": nwalkers,
-        "blocks": 1,
-        "batched": True
-    },
-    "trial": {
-        "filename": "afqmc.h5"
-    },
-    "estimators": {}
+    "hamiltonian": {"name": "Generic", "integrals": "afqmc.h5"},
+    "qmc": {"dt": 0.01, "nsteps": nsteps, "nwalkers": nwalkers, "blocks": 1, "batched": True},
+    "trial": {"filename": "afqmc.h5"},
+    "estimators": {},
 }
 
 numpy.random.seed(7)
@@ -49,33 +38,24 @@
 verbose = True
 shared_comm = get_shared_comm(comm, verbose=verbose)
 
-qmc_opts = get_input_value(options, 'qmc',
-                           default={},
-                           verbose=verbose)
-ham_opts = get_input_value(options, 'hamiltonian',
-                           default={},
-                           verbose=verbose)
-twf_opts = get_input_value(options, 'trial',
-                           default={},
-                           verbose=verbose)
-prop_opts = get_input_value(options, 'propoagator',
-                           default={},
-                           verbose=verbose)
-qmc = QMCOpts(qmc_opts,verbose=True)
-ham = get_hamiltonian (sys, ham_opts, verbose = True, comm=shared_comm)
-
-trial = ( get_trial_wavefunction(sys, ham, options=twf_opts,
-                       comm=comm,
-                       scomm=shared_comm,
-                       verbose=verbose) )
-trial.psi = trial.psi[0] # Super hacky thing to do; this needs to be fixed...
-trial.psia = trial.psia[0] # Super hacky thing to do; this needs to be fixed...
-trial.psib = trial.psib[0] # Super hacky thing to do; this needs to be fixed...
-trial.calculate_energy(sys, ham) # this is to get the energy shift
-prop = get_propagator_driver(sys, ham, trial, qmc, options=prop_opts,verbose=verbose)
+qmc_opts = get_input_value(options, "qmc", default={}, verbose=verbose)
+ham_opts = get_input_value(options, "hamiltonian", default={}, verbose=verbose)
+twf_opts = get_input_value(options, "trial", default={}, verbose=verbose)
+prop_opts = get_input_value(options, "propoagator", default={}, verbose=verbose)
+qmc = QMCOpts(qmc_opts, verbose=True)
+ham = get_hamiltonian(sys, ham_opts, verbose=True, comm=shared_comm)
+
+trial = get_trial_wavefunction(
+    sys, ham, options=twf_opts, comm=comm, scomm=shared_comm, verbose=verbose
+)
+trial.psi = trial.psi[0]  # Super hacky thing to do; this needs to be fixed...
+trial.psia = trial.psia[0]  # Super hacky thing to do; this needs to be fixed...
+trial.psib = trial.psib[0]  # Super hacky thing to do; this needs to be fixed...
+trial.calculate_energy(sys, ham)  # this is to get the energy shift
+prop = get_propagator_driver(sys, ham, trial, qmc, options=prop_opts, verbose=verbose)
 
 walker_batch = SingleDetWalkerBatch(sys, ham, trial, nwalkers)
-for i in range (nsteps):
+for i in range(nsteps):
     prop.propagate_walker_batch(walker_batch, sys, ham, trial, trial.energy)
     walker_batch.reortho()
 
@@ -84,21 +64,10 @@
         "nup": nelec[0],
         "ndown": nelec[1],
     },
-    "hamiltonian": {
-        "name": "Generic",
-        "integrals": "afqmc.h5"
-    },
-    "qmc": {
-        "dt": 0.01,
-        "nsteps": nsteps,
-        "nwalkers": nwalkers,
-        "blocks": 1,
-        "batched": False
-    },
-    "trial": {
-        "filename": "afqmc.h5"
-    },
-    "estimators": {}
+    "hamiltonian": {"name": "Generic", "integrals": "afqmc.h5"},
+    "qmc": {"dt": 0.01, "nsteps": nsteps, "nwalkers": nwalkers, "blocks": 1, "batched": False},
+    "trial": {"filename": "afqmc.h5"},
+    "estimators": {},
 }
 
 numpy.random.seed(7)
@@ -107,29 +76,18 @@
 verbose = True
 shared_comm = get_shared_comm(comm, verbose=verbose)
 
-qmc_opts = get_input_value(options, 'qmc',
-                           default={},
-                           verbose=verbose)
-ham_opts = get_input_value(options, 'hamiltonian',
-                           default={},
-                           verbose=verbose)
-twf_opts = get_input_value(options, 'trial',
-                           default={},
-                           verbose=verbose)
-prop_opts = get_input_value(options, 'propoagator',
-                           default={},
-                           verbose=verbose)
-qmc = QMCOpts(qmc_opts,verbose=True)
-prop = get_propagator_driver_legacy(sys, ham, trial, qmc, options=prop_opts,verbose=verbose)
+qmc_opts = get_input_value(options, "qmc", default={}, verbose=verbose)
+ham_opts = get_input_value(options, "hamiltonian", default={}, verbose=verbose)
+twf_opts = get_input_value(options, "trial", default={}, verbose=verbose)
+prop_opts = get_input_value(options, "propoagator", default={}, verbose=verbose)
+qmc = QMCOpts(qmc_opts, verbose=True)
+prop = get_propagator_driver_legacy(sys, ham, trial, qmc, options=prop_opts, verbose=verbose)
 walkers = [SingleDetWalker(sys, ham, trial) for iw in range(nwalkers)]
-for i in range (nsteps):
+for i in range(nsteps):
     for walker in walkers:
         prop.propagate_walker(walker, sys, ham, trial, trial.energy)
-        detR = walker.reortho(trial) # reorthogonalizing to stablize
+        detR = walker.reortho(trial)  # reorthogonalizing to stablize
 
 for iw in range(nwalkers):
-    assert numpy.allclose(walker_batch.phia[iw], walkers[iw].phi[:,:nelec[0]])
-    assert numpy.allclose(walker_batch.phib[iw], walkers[iw].phi[:,nelec[0]:])
-
-
-
+    assert numpy.allclose(walker_batch.phia[iw], walkers[iw].phi[:, : nelec[0]])
+    assert numpy.allclose(walker_batch.phib[iw], walkers[iw].phi[:, nelec[0] :])
diff --git a/timing_scripts/afqmc_timing.py b/timing_scripts/afqmc_timing.py
index 0cee60a1..03813737 100644
--- a/timing_scripts/afqmc_timing.py
+++ b/timing_scripts/afqmc_timing.py
@@ -17,25 +17,16 @@
 
 from mpi4py import MPI
 
-nelec = (94,92)
+nelec = (94, 92)
 options = {
     "system": {
         "nup": 94,
         "ndown": 92,
     },
-    "hamiltonian": {
-        "name": "Generic",
-        "integrals": "afqmc.h5"
-    },
-    "qmc": {
-        "dt": 0.005,
-        "nwalkers": 1,
-        "blocks": 1
-    },
-    "trial": {
-        "filename": "afqmc.h5"
-    },
-    "estimators": {}
+    "hamiltonian": {"name": "Generic", "integrals": "afqmc.h5"},
+    "qmc": {"dt": 0.005, "nwalkers": 1, "blocks": 1},
+    "trial": {"filename": "afqmc.h5"},
+    "estimators": {},
 }
 numpy.random.seed(7)
 sys = Generic(nelec=nelec)
@@ -43,29 +34,20 @@
 verbose = True
 shared_comm = get_shared_comm(comm, verbose=verbose)
 
-qmc_opts = get_input_value(options, 'qmc',
-                           default={},
-                           verbose=verbose)
-ham_opts = get_input_value(options, 'hamiltonian',
-                           default={},
-                           verbose=verbose)
-twf_opts = get_input_value(options, 'trial',
-                           default={},
-                           verbose=verbose)
-prop_opts = get_input_value(options, 'propoagator',
-                           default={},
-                           verbose=verbose)
-qmc = QMCOpts(qmc_opts, sys,verbose=True)
-ham = get_hamiltonian (sys, ham_opts, verbose = True, comm=shared_comm)
+qmc_opts = get_input_value(options, "qmc", default={}, verbose=verbose)
+ham_opts = get_input_value(options, "hamiltonian", default={}, verbose=verbose)
+twf_opts = get_input_value(options, "trial", default={}, verbose=verbose)
+prop_opts = get_input_value(options, "propoagator", default={}, verbose=verbose)
+qmc = QMCOpts(qmc_opts, sys, verbose=True)
+ham = get_hamiltonian(sys, ham_opts, verbose=True, comm=shared_comm)
 
-trial = ( get_trial_wavefunction(sys, ham, options=twf_opts,
-                       comm=comm,
-                       scomm=shared_comm,
-                       verbose=verbose) )
-trial.psi = trial.psi[0] # Super hacky thing to do; this needs to be fixed...
-trial.calculate_energy(sys, ham) # this is to get the energy shift
+trial = get_trial_wavefunction(
+    sys, ham, options=twf_opts, comm=comm, scomm=shared_comm, verbose=verbose
+)
+trial.psi = trial.psi[0]  # Super hacky thing to do; this needs to be fixed...
+trial.calculate_energy(sys, ham)  # this is to get the energy shift
 
-prop = get_propagator_driver(sys, ham, trial, qmc, options=prop_opts,verbose=verbose)
+prop = get_propagator_driver(sys, ham, trial, qmc, options=prop_opts, verbose=verbose)
 
 walker = SingleDetWalker(sys, ham, trial)
 
@@ -74,17 +56,15 @@
 pr = cProfile.Profile()
 pr.enable()
 
-for i in range (nsteps):
+for i in range(nsteps):
     prop.propagate_walker(walker, sys, ham, trial, trial.energy)
-    detR = walker.reortho(trial) # reorthogonalizing to stablize
+    detR = walker.reortho(trial)  # reorthogonalizing to stablize
 
 pr.disable()
-pr.print_stats(sort='cumtime')
+pr.print_stats(sort="cumtime")
 
-walker.greens_function(trial) # Green's function gets updated
+walker.greens_function(trial)  # Green's function gets updated
 etot = local_energy(sys, ham, walker, trial)[0]
 
 print("a sample of local_energy = {}".format(etot))
 # a sample of local_energy = (-2244.6424862764557+0.00047855044660360946j)
-
-
diff --git a/timing_scripts/chunking_VHS.py b/timing_scripts/chunking_VHS.py
index abe6f743..40ef9582 100644
--- a/timing_scripts/chunking_VHS.py
+++ b/timing_scripts/chunking_VHS.py
@@ -10,14 +10,14 @@
 size = comm.Get_size()
 rank = comm.Get_rank()
 
-handler = MPIHandler(comm,options={"nmembers":3}, verbose=(rank==0))
+handler = MPIHandler(comm, options={"nmembers": 3}, verbose=(rank == 0))
 
 nwalkers = 50
 n = 10
-nchol = n*4
-shape = (nchol,n*n)
+nchol = n * 4
+shape = (nchol, n * n)
 
-if (handler.srank == 0):
+if handler.srank == 0:
     chol = numpy.random.random(shape)
 else:
     chol = None
@@ -25,15 +25,15 @@
 chol_chunk = handler.scatter_group(chol)
 chol_chunk = chol_chunk.T.copy()
 
-xshifted = numpy.random.random((nchol,nwalkers))
+xshifted = numpy.random.random((nchol, nwalkers))
 
 chol_idxs = [i for i in range(nchol)]
 chol_idxs_chunk = handler.scatter_group(chol_idxs)
 
 senders = numpy.array([i for i in range(handler.scomm.size)])
-receivers = numpy.array([i for i in range(1,handler.scomm.size)] + [0])
+receivers = numpy.array([i for i in range(1, handler.scomm.size)] + [0])
 
-VHS_send = chol_chunk.dot(xshifted[chol_idxs_chunk,:])
+VHS_send = chol_chunk.dot(xshifted[chol_idxs_chunk, :])
 VHS_recv = numpy.zeros_like(VHS_send)
 
 xshifted_send = xshifted.copy()
@@ -42,29 +42,29 @@
 ssize = handler.scomm.size
 srank = handler.scomm.rank
 
-for icycle in range(handler.ssize-1):
+for icycle in range(handler.ssize - 1):
     for isend, sender in enumerate(senders):
         if srank == isend:
-            handler.scomm.Send(xshifted_send,dest=receivers[isend], tag=1)
-            handler.scomm.Send(VHS_send,dest=receivers[isend], tag=2)
+            handler.scomm.Send(xshifted_send, dest=receivers[isend], tag=1)
+            handler.scomm.Send(VHS_send, dest=receivers[isend], tag=2)
         elif srank == receivers[isend]:
             sender = numpy.where(receivers == srank)[0]
-            handler.scomm.Recv(xshifted_recv,source=sender, tag=1)
-            handler.scomm.Recv(VHS_recv,source=sender, tag=2)
+            handler.scomm.Recv(xshifted_recv, source=sender, tag=1)
+            handler.scomm.Recv(VHS_recv, source=sender, tag=2)
     handler.scomm.barrier()
     # prepare sending
-    VHS_send = VHS_recv + chol_chunk.dot(xshifted_recv[chol_idxs_chunk,:])
+    VHS_send = VHS_recv + chol_chunk.dot(xshifted_recv[chol_idxs_chunk, :])
     xshifted_send = xshifted_recv.copy()
 
 for isend, sender in enumerate(senders):
-    if (handler.scomm.rank == sender): # sending 1 xshifted to 0 xshifted_buf
-        handler.scomm.Send(VHS_send,dest=receivers[isend], tag=1)
+    if handler.scomm.rank == sender:  # sending 1 xshifted to 0 xshifted_buf
+        handler.scomm.Send(VHS_send, dest=receivers[isend], tag=1)
     elif srank == receivers[isend]:
         sender = numpy.where(receivers == srank)[0]
-        handler.scomm.Recv(VHS_recv,source=sender, tag=1)
+        handler.scomm.Recv(VHS_recv, source=sender, tag=1)
 
 VHS = VHS_recv.copy()
 
 # if (handler.srank == 0):
 VHS_ref = chol.T.dot(xshifted)
-assert(numpy.allclose(VHS, VHS_ref))
\ No newline at end of file
+assert numpy.allclose(VHS, VHS_ref)
diff --git a/timing_scripts/chunking_force_bias.py b/timing_scripts/chunking_force_bias.py
index 28e76efc..e9ccb1d6 100644
--- a/timing_scripts/chunking_force_bias.py
+++ b/timing_scripts/chunking_force_bias.py
@@ -10,7 +10,7 @@
 size = comm.Get_size()
 rank = comm.Get_rank()
 
-handler = MPIHandler(comm,options={"nmembers":3}, verbose=(rank==0))
+handler = MPIHandler(comm, options={"nmembers": 3}, verbose=(rank == 0))
 
 ssize = handler.scomm.size
 srank = handler.scomm.rank
@@ -18,13 +18,13 @@
 nwalkers = 40
 nbsf = 100
 nchol = nbsf
-shape = (nchol,nbsf*nbsf)
+shape = (nchol, nbsf * nbsf)
 
 na = 18
 nb = 17
 
-rchola = numpy.random.random((nchol,na*nbsf))
-rcholb = numpy.random.random((nchol,nb*nbsf))
+rchola = numpy.random.random((nchol, na * nbsf))
+rcholb = numpy.random.random((nchol, nb * nbsf))
 
 rchola = comm.bcast(rchola)
 rcholb = comm.bcast(rcholb)
@@ -33,8 +33,12 @@
 rcholb_chunk = handler.scatter_group(rcholb)
 
 # distinct GF for each processor
-Ghalfa = numpy.random.random((nwalkers, na*nbsf)) + 1.j * numpy.random.random((nwalkers, na*nbsf))
-Ghalfb = numpy.random.random((nwalkers, nb*nbsf)) + 1.j * numpy.random.random((nwalkers, nb*nbsf))
+Ghalfa = numpy.random.random((nwalkers, na * nbsf)) + 1.0j * numpy.random.random(
+    (nwalkers, na * nbsf)
+)
+Ghalfb = numpy.random.random((nwalkers, nb * nbsf)) + 1.0j * numpy.random.random(
+    (nwalkers, nb * nbsf)
+)
 
 chol_idxs = [i for i in range(nchol)]
 chol_idxs_chunk = handler.scatter_group(chol_idxs)
@@ -45,54 +49,62 @@
 Ghalfa_recv = numpy.zeros_like(Ghalfa)
 Ghalfb_recv = numpy.zeros_like(Ghalfb)
 
-vbias_batch_real_recv = numpy.zeros((nchol,nwalkers))
-vbias_batch_imag_recv = numpy.zeros((nchol,nwalkers))
+vbias_batch_real_recv = numpy.zeros((nchol, nwalkers))
+vbias_batch_imag_recv = numpy.zeros((nchol, nwalkers))
 
-vbias_batch_real_send = numpy.zeros((nchol,nwalkers))
-vbias_batch_imag_send = numpy.zeros((nchol,nwalkers))
+vbias_batch_real_send = numpy.zeros((nchol, nwalkers))
+vbias_batch_imag_send = numpy.zeros((nchol, nwalkers))
 
-vbias_batch_real_send[chol_idxs_chunk,:] = rchola_chunk.dot(Ghalfa.T.real) + rcholb_chunk.dot(Ghalfb.T.real)
-vbias_batch_imag_send[chol_idxs_chunk,:] = rchola_chunk.dot(Ghalfa.T.imag) + rcholb_chunk.dot(Ghalfb.T.imag)
+vbias_batch_real_send[chol_idxs_chunk, :] = rchola_chunk.dot(Ghalfa.T.real) + rcholb_chunk.dot(
+    Ghalfb.T.real
+)
+vbias_batch_imag_send[chol_idxs_chunk, :] = rchola_chunk.dot(Ghalfa.T.imag) + rcholb_chunk.dot(
+    Ghalfb.T.imag
+)
 
 senders = handler.senders
 receivers = handler.receivers
 
-for icycle in range(handler.ssize-1):
+for icycle in range(handler.ssize - 1):
     for isend, sender in enumerate(senders):
         if srank == isend:
-            handler.scomm.Send(Ghalfa_send,dest=receivers[isend], tag=1)
-            handler.scomm.Send(Ghalfb_send,dest=receivers[isend], tag=2)
-            handler.scomm.Send(vbias_batch_real_send,dest=receivers[isend], tag=3)
-            handler.scomm.Send(vbias_batch_imag_send,dest=receivers[isend], tag=4)
+            handler.scomm.Send(Ghalfa_send, dest=receivers[isend], tag=1)
+            handler.scomm.Send(Ghalfb_send, dest=receivers[isend], tag=2)
+            handler.scomm.Send(vbias_batch_real_send, dest=receivers[isend], tag=3)
+            handler.scomm.Send(vbias_batch_imag_send, dest=receivers[isend], tag=4)
         elif srank == receivers[isend]:
             sender = numpy.where(receivers == srank)[0]
-            handler.scomm.Recv(Ghalfa_recv,source=sender, tag=1)
-            handler.scomm.Recv(Ghalfb_recv,source=sender, tag=2)
-            handler.scomm.Recv(vbias_batch_real_recv,source=sender, tag=3)
-            handler.scomm.Recv(vbias_batch_imag_recv,source=sender, tag=4)
+            handler.scomm.Recv(Ghalfa_recv, source=sender, tag=1)
+            handler.scomm.Recv(Ghalfb_recv, source=sender, tag=2)
+            handler.scomm.Recv(vbias_batch_real_recv, source=sender, tag=3)
+            handler.scomm.Recv(vbias_batch_imag_recv, source=sender, tag=4)
     handler.scomm.barrier()
 
     # prepare sending
     vbias_batch_real_send = vbias_batch_real_recv.copy()
     vbias_batch_imag_send = vbias_batch_imag_recv.copy()
-    vbias_batch_real_send[chol_idxs_chunk,:] = rchola_chunk.dot(Ghalfa_recv.T.real) + rcholb_chunk.dot(Ghalfb_recv.T.real)
-    vbias_batch_imag_send[chol_idxs_chunk,:] = rchola_chunk.dot(Ghalfa_recv.T.imag) + rcholb_chunk.dot(Ghalfb_recv.T.imag)
+    vbias_batch_real_send[chol_idxs_chunk, :] = rchola_chunk.dot(
+        Ghalfa_recv.T.real
+    ) + rcholb_chunk.dot(Ghalfb_recv.T.real)
+    vbias_batch_imag_send[chol_idxs_chunk, :] = rchola_chunk.dot(
+        Ghalfa_recv.T.imag
+    ) + rcholb_chunk.dot(Ghalfb_recv.T.imag)
     Ghalfa_send = Ghalfa_recv.copy()
     Ghalfb_send = Ghalfb_recv.copy()
 
-if (len(senders)>1):
+if len(senders) > 1:
     for isend, sender in enumerate(senders):
-        if (handler.scomm.rank == sender): # sending 1 xshifted to 0 xshifted_buf
-            handler.scomm.Send(vbias_batch_real_send,dest=receivers[isend], tag=1)
-            handler.scomm.Send(vbias_batch_imag_send,dest=receivers[isend], tag=2)
+        if handler.scomm.rank == sender:  # sending 1 xshifted to 0 xshifted_buf
+            handler.scomm.Send(vbias_batch_real_send, dest=receivers[isend], tag=1)
+            handler.scomm.Send(vbias_batch_imag_send, dest=receivers[isend], tag=2)
         elif srank == receivers[isend]:
             sender = numpy.where(receivers == srank)[0]
-            handler.scomm.Recv(vbias_batch_real_recv,source=sender, tag=1)
-            handler.scomm.Recv(vbias_batch_imag_recv,source=sender, tag=2)
+            handler.scomm.Recv(vbias_batch_real_recv, source=sender, tag=1)
+            handler.scomm.Recv(vbias_batch_imag_recv, source=sender, tag=2)
 
 # vbias_batch_real = rchola.dot(Ghalfa.T.real) + rcholb.dot(Ghalfb.T.real)
 # vbias_batch_imag = rchola.dot(Ghalfa.T.imag) + rcholb.dot(Ghalfb.T.imag)
-vbias_batch = vbias_batch_real_recv + 1.j * vbias_batch_imag_recv
+vbias_batch = vbias_batch_real_recv + 1.0j * vbias_batch_imag_recv
 
 # vbias_batch = numpy.empty((walker_batch.nwalkers, hamiltonian.nchol), dtype=Ghalfa.dtype)
 # vbias_batch.real = vbias_batch_real.T.copy()
@@ -109,12 +121,4 @@
 #     tmp = rchola_chunk.dot(Ghalfa.T.real)# + rcholb_chunk.dot(Ghalfb.T.real)
 #     print("vbias_batch_real_send.real in {} = {}".format(srank, tmp))
 
-assert(numpy.allclose(vbias_batch, vbias_batch_ref))
-
-
-
-
-
-
-
-
+assert numpy.allclose(vbias_batch, vbias_batch_ref)
diff --git a/timing_scripts/chunking_local_energy.py b/timing_scripts/chunking_local_energy.py
index 18653a54..b7d8b4ef 100644
--- a/timing_scripts/chunking_local_energy.py
+++ b/timing_scripts/chunking_local_energy.py
@@ -4,15 +4,17 @@
 import numpy
 from mpi4py import MPI
 
-from ipie.estimators.local_energy_sd import (ecoul_kernel_batch_real_rchol_uhf,
-                                             exx_kernel_batch_real_rchol)
+from ipie.estimators.local_energy_sd import (
+    ecoul_kernel_batch_real_rchol_uhf,
+    exx_kernel_batch_real_rchol,
+)
 from ipie.utils.mpi import MPIHandler, get_shared_array, get_shared_comm
 
 comm = MPI.COMM_WORLD
 size = comm.Get_size()
 rank = comm.Get_rank()
 
-handler = MPIHandler(comm,options={"nmembers":3}, verbose=(rank==0))
+handler = MPIHandler(comm, options={"nmembers": 3}, verbose=(rank == 0))
 
 ssize = handler.scomm.size
 srank = handler.scomm.rank
@@ -20,13 +22,13 @@
 nwalkers = 40
 nbasis = 100
 nchol = nbasis
-shape = (nchol,nbasis*nbasis)
+shape = (nchol, nbasis * nbasis)
 
 nalpha = 18
 nbeta = 17
 
-rchola = numpy.random.random((nchol,nalpha*nbasis))
-rcholb = numpy.random.random((nchol,nbeta*nbasis))
+rchola = numpy.random.random((nchol, nalpha * nbasis))
+rcholb = numpy.random.random((nchol, nbeta * nbasis))
 
 rchola = comm.bcast(rchola)
 rcholb = comm.bcast(rcholb)
@@ -35,8 +37,12 @@
 rcholb_chunk = handler.scatter_group(rcholb)
 
 # distinct GF for each processor
-Ghalfa = numpy.random.random((nwalkers, nalpha*nbasis)) + 1.j * numpy.random.random((nwalkers, nalpha*nbasis))
-Ghalfb = numpy.random.random((nwalkers, nbeta*nbasis)) + 1.j * numpy.random.random((nwalkers, nbeta*nbasis))
+Ghalfa = numpy.random.random((nwalkers, nalpha * nbasis)) + 1.0j * numpy.random.random(
+    (nwalkers, nalpha * nbasis)
+)
+Ghalfb = numpy.random.random((nwalkers, nbeta * nbasis)) + 1.0j * numpy.random.random(
+    (nwalkers, nbeta * nbasis)
+)
 
 chol_idxs = [i for i in range(nchol)]
 chol_idxs_chunk = handler.scatter_group(chol_idxs)
@@ -50,8 +56,8 @@
 senders = handler.senders
 receivers = handler.receivers
 
-Ghalfa = Ghalfa.reshape(nwalkers, nalpha*nbasis)
-Ghalfb = Ghalfb.reshape(nwalkers, nbeta*nbasis)
+Ghalfa = Ghalfa.reshape(nwalkers, nalpha * nbasis)
+Ghalfb = Ghalfb.reshape(nwalkers, nbeta * nbasis)
 ecoul_send = ecoul_kernel_batch_real_rchol_uhf(rchola_chunk, rcholb_chunk, Ghalfa, Ghalfb)
 Ghalfa = Ghalfa.reshape(nwalkers, nalpha, nbasis)
 Ghalfb = Ghalfb.reshape(nwalkers, nbeta, nbasis)
@@ -61,26 +67,28 @@
 exx_recv = exx_send.copy()
 ecoul_recv = ecoul_send.copy()
 
-for icycle in range(handler.ssize-1):
+for icycle in range(handler.ssize - 1):
     for isend, sender in enumerate(senders):
         if srank == isend:
-            handler.scomm.Send(Ghalfa_send,dest=receivers[isend], tag=1)
-            handler.scomm.Send(Ghalfb_send,dest=receivers[isend], tag=2)
-            handler.scomm.Send(ecoul_send,dest=receivers[isend], tag=3)
-            handler.scomm.Send(exx_send,dest=receivers[isend], tag=4)
+            handler.scomm.Send(Ghalfa_send, dest=receivers[isend], tag=1)
+            handler.scomm.Send(Ghalfb_send, dest=receivers[isend], tag=2)
+            handler.scomm.Send(ecoul_send, dest=receivers[isend], tag=3)
+            handler.scomm.Send(exx_send, dest=receivers[isend], tag=4)
         elif srank == receivers[isend]:
             sender = numpy.where(receivers == srank)[0]
-            handler.scomm.Recv(Ghalfa_recv,source=sender, tag=1)
-            handler.scomm.Recv(Ghalfb_recv,source=sender, tag=2)
-            handler.scomm.Recv(ecoul_recv,source=sender, tag=3)
-            handler.scomm.Recv(exx_recv,source=sender, tag=4)
+            handler.scomm.Recv(Ghalfa_recv, source=sender, tag=1)
+            handler.scomm.Recv(Ghalfb_recv, source=sender, tag=2)
+            handler.scomm.Recv(ecoul_recv, source=sender, tag=3)
+            handler.scomm.Recv(exx_recv, source=sender, tag=4)
     handler.scomm.barrier()
 
     # prepare sending
     ecoul_send = ecoul_recv.copy()
-    Ghalfa_recv = Ghalfa_recv.reshape(nwalkers, nalpha*nbasis)
-    Ghalfb_recv = Ghalfb_recv.reshape(nwalkers, nbeta*nbasis)
-    ecoul_send += ecoul_kernel_batch_real_rchol_uhf(rchola_chunk, rcholb_chunk, Ghalfa_recv, Ghalfb_recv)
+    Ghalfa_recv = Ghalfa_recv.reshape(nwalkers, nalpha * nbasis)
+    Ghalfb_recv = Ghalfb_recv.reshape(nwalkers, nbeta * nbasis)
+    ecoul_send += ecoul_kernel_batch_real_rchol_uhf(
+        rchola_chunk, rcholb_chunk, Ghalfa_recv, Ghalfb_recv
+    )
     Ghalfa_recv = Ghalfa_recv.reshape(nwalkers, nalpha, nbasis)
     Ghalfb_recv = Ghalfb_recv.reshape(nwalkers, nbeta, nbasis)
     exx_send = exx_recv.copy()
@@ -89,22 +97,21 @@
     Ghalfa_send = Ghalfa_recv.copy()
     Ghalfb_send = Ghalfb_recv.copy()
 
-if (len(senders)>1):
+if len(senders) > 1:
     for isend, sender in enumerate(senders):
-        if (handler.scomm.rank == sender): # sending 1 xshifted to 0 xshifted_buf
-            handler.scomm.Send(ecoul_send,dest=receivers[isend], tag=1)
-            handler.scomm.Send(exx_send,dest=receivers[isend], tag=2)
+        if handler.scomm.rank == sender:  # sending 1 xshifted to 0 xshifted_buf
+            handler.scomm.Send(ecoul_send, dest=receivers[isend], tag=1)
+            handler.scomm.Send(exx_send, dest=receivers[isend], tag=2)
         elif srank == receivers[isend]:
             sender = numpy.where(receivers == srank)[0]
-            handler.scomm.Recv(ecoul_recv,source=sender, tag=1)
-            handler.scomm.Recv(exx_recv,source=sender, tag=2)
+            handler.scomm.Recv(ecoul_recv, source=sender, tag=1)
+            handler.scomm.Recv(exx_recv, source=sender, tag=2)
 
-Ghalfa = Ghalfa.reshape(nwalkers, nalpha*nbasis)
-Ghalfb = Ghalfb.reshape(nwalkers, nbeta*nbasis)
+Ghalfa = Ghalfa.reshape(nwalkers, nalpha * nbasis)
+Ghalfb = Ghalfb.reshape(nwalkers, nbeta * nbasis)
 ecoul_ref = ecoul_kernel_batch_real_rchol_uhf(rchola, rcholb, Ghalfa, Ghalfb)
 Ghalfa = Ghalfa.reshape(nwalkers, nalpha, nbasis)
 Ghalfb = Ghalfb.reshape(nwalkers, nbeta, nbasis)
 exx_ref = exx_kernel_batch_real_rchol(rchola, Ghalfa) + exx_kernel_batch_real_rchol(rcholb, Ghalfb)
-assert(numpy.allclose(ecoul_ref,ecoul_recv))
-assert(numpy.allclose(exx_ref,exx_recv))
-
+assert numpy.allclose(ecoul_ref, ecoul_recv)
+assert numpy.allclose(exx_ref, exx_recv)
diff --git a/timing_scripts/local_energy.py b/timing_scripts/local_energy.py
index f07cb473..45914eb5 100644
--- a/timing_scripts/local_energy.py
+++ b/timing_scripts/local_energy.py
@@ -1,6 +1,6 @@
 import os
 
-os.environ['MKL_NUM_THREADS'] = '1'
+os.environ["MKL_NUM_THREADS"] = "1"
 import timeit
 
 import jax
@@ -34,17 +34,17 @@ def jax_compute(rchol_a, rchol_b, GaT, GbT):
 
 
 def load_func(as_jax=False):
-    #GaT = np.load("GaT.npy")
-    #GbT = np.load("GbT.npy")
-    #rchol_a = np.load("rchol_a.npy")
-    #rchol_b = np.load("rchol_b.npy")
+    # GaT = np.load("GaT.npy")
+    # GbT = np.load("GbT.npy")
+    # rchol_a = np.load("rchol_a.npy")
+    # rchol_b = np.load("rchol_b.npy")
     nao = 570
     nocc = 120
     naux = 3000
-    GaT = np.zeros((nao, nocc), dtype = np.complex128)
-    GbT = np.zeros((nao, nocc), dtype = np.complex128)
-    rchol_a = np.zeros((naux, nocc, nao), dtype = np.float64)
-    rchol_b = np.zeros((naux, nocc, nao), dtype = np.float64)
+    GaT = np.zeros((nao, nocc), dtype=np.complex128)
+    GbT = np.zeros((nao, nocc), dtype=np.complex128)
+    rchol_a = np.zeros((naux, nocc, nao), dtype=np.float64)
+    rchol_b = np.zeros((naux, nocc, nao), dtype=np.float64)
     if as_jax:
         return jnp.array(rchol_a), jnp.array(rchol_b), jnp.array(GaT), jnp.array(GbT)
     return rchol_a, rchol_b, GaT, GbT
@@ -56,33 +56,39 @@ def main():
     nao = 570
     nocc = 120
     naux = 3000
-    GaT = np.zeros((nao, nocc), dtype = np.complex128)
-    GbT = np.zeros((nao, nocc), dtype = np.complex128)
-    rchol_a = np.zeros((naux, nocc, nao), dtype = np.float64)
-    rchol_b = np.zeros((naux, nocc, nao), dtype = np.float64)
+    GaT = np.zeros((nao, nocc), dtype=np.complex128)
+    GbT = np.zeros((nao, nocc), dtype=np.complex128)
+    rchol_a = np.zeros((naux, nocc, nao), dtype=np.float64)
+    rchol_b = np.zeros((naux, nocc, nao), dtype=np.float64)
     # GaT = np.load("GaT.npy")
     # GbT = np.load("GbT.npy")
     # rchol_a = np.load("rchol_a.npy")
     # rchol_b = np.load("rchol_b.npy")
 
-
     vmap_jax_cc = jax.vmap(jax_chol, in_axes=[0, None])
     exx_a = np.sum(vmap_jax_cc(jnp.array(rchol_a), jnp.array(GaT)))
     exx_b = np.sum(vmap_jax_cc(jnp.array(rchol_b), jnp.array(GbT)))
     exx = exx_a + exx_b
-    val = jax_compute(jnp.array(rchol_a), jnp.array(rchol_b), jnp.array(GaT),
-                      jnp.array(GbT)).block_until_ready()
+    val = jax_compute(
+        jnp.array(rchol_a), jnp.array(rchol_b), jnp.array(GaT), jnp.array(GbT)
+    ).block_until_ready()
     print(val, exx)
-    t = timeit.timeit(stmt='jax_compute(rchol_a, rchol_b, GaT, GbT).block_until_ready()',
-                      setup='rchol_a, rchol_b, GaT, GbT = load_func(as_jax=True)',
-                      globals=globals(), number=1)
+    t = timeit.timeit(
+        stmt="jax_compute(rchol_a, rchol_b, GaT, GbT).block_until_ready()",
+        setup="rchol_a, rchol_b, GaT, GbT = load_func(as_jax=True)",
+        globals=globals(),
+        number=1,
+    )
     print(t)
 
-    t = timeit.timeit(stmt='numpy_compute(rchol_a, rchol_b, GaT, GbT)',
-                      setup='rchol_a, rchol_b, GaT, GbT = load_func()',
-                      globals=globals(), number=1)
+    t = timeit.timeit(
+        stmt="numpy_compute(rchol_a, rchol_b, GaT, GbT)",
+        setup="rchol_a, rchol_b, GaT, GbT = load_func()",
+        globals=globals(),
+        number=1,
+    )
     print(t)
 
+
 if __name__ == "__main__":
     main()
-
diff --git a/timing_scripts/local_energy_CPU.py b/timing_scripts/local_energy_CPU.py
index fa6af7de..e480c3db 100644
--- a/timing_scripts/local_energy_CPU.py
+++ b/timing_scripts/local_energy_CPU.py
@@ -1,6 +1,6 @@
 import os
 
-os.environ['MKL_NUM_THREADS'] = '1'
+os.environ["MKL_NUM_THREADS"] = "1"
 import time
 import timeit
 
@@ -13,33 +13,34 @@ def local_energy_generic_cholesky_opt(Ghalfa, Ghalfb, rchola, rcholb):
     nbeta = Ghalfb.shape[0]
     nbasis = Ghalfa.shape[-1]
 
-    Xa = rchola.dot(Ghalfa.real.ravel()) + 1.j * rchola.dot(Ghalfa.imag.ravel())
-    Xb = rcholb.dot(Ghalfb.real.ravel()) + 1.j * rcholb.dot(Ghalfb.imag.ravel())
+    Xa = rchola.dot(Ghalfa.real.ravel()) + 1.0j * rchola.dot(Ghalfa.imag.ravel())
+    Xb = rcholb.dot(Ghalfb.real.ravel()) + 1.0j * rcholb.dot(Ghalfb.imag.ravel())
 
-    ecoul = numpy.dot(Xa,Xa)
-    ecoul += numpy.dot(Xb,Xb)
-    ecoul += 2*numpy.dot(Xa,Xb)
+    ecoul = numpy.dot(Xa, Xa)
+    ecoul += numpy.dot(Xb, Xb)
+    ecoul += 2 * numpy.dot(Xa, Xb)
 
     GhalfaT = Ghalfa.T.copy()
-    GhalfbT = Ghalfb.T.copy() # nbasis x nocc
-    
-    Ta = numpy.zeros((nalpha,nalpha), dtype=numpy.complex128)
-    Tb = numpy.zeros((nbeta,nbeta), dtype=numpy.complex128)
+    GhalfbT = Ghalfb.T.copy()  # nbasis x nocc
 
-    exx  = 0.j  # we will iterate over cholesky index to update Ex energy for alpha and beta
+    Ta = numpy.zeros((nalpha, nalpha), dtype=numpy.complex128)
+    Tb = numpy.zeros((nbeta, nbeta), dtype=numpy.complex128)
+
+    exx = 0.0j  # we will iterate over cholesky index to update Ex energy for alpha and beta
     for x in range(naux):  # write a cython function that calls blas for this.
-        rmi_a = rchola[x].reshape((nalpha,nbasis))
-        rmi_b = rcholb[x].reshape((nbeta,nbasis))
-        Ta[:,:].real = rmi_a.dot(GhalfaT.real) 
-        Ta[:,:].imag = rmi_a.dot(GhalfaT.imag)  # this is a (nalpha, nalpha)
-        Tb[:,:].real = rmi_b.dot(GhalfbT.real) 
-        Tb[:,:].imag = rmi_b.dot(GhalfbT.imag) # this is (nbeta, nbeta)
+        rmi_a = rchola[x].reshape((nalpha, nbasis))
+        rmi_b = rcholb[x].reshape((nbeta, nbasis))
+        Ta[:, :].real = rmi_a.dot(GhalfaT.real)
+        Ta[:, :].imag = rmi_a.dot(GhalfaT.imag)  # this is a (nalpha, nalpha)
+        Tb[:, :].real = rmi_b.dot(GhalfbT.real)
+        Tb[:, :].imag = rmi_b.dot(GhalfbT.imag)  # this is (nbeta, nbeta)
         exx += numpy.trace(Ta.dot(Ta)) + numpy.trace(Tb.dot(Tb))
 
     e2b = 0.5 * (ecoul - exx)
 
     return e2b
 
+
 def local_energy_generic_batch(Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
     # Element wise multiplication.
     nwalkers = Ghalfa_batch.shape[0]
@@ -47,63 +48,70 @@ def local_energy_generic_batch(Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
     nbeta = Ghalfb_batch.shape[1]
     nbasis = Ghalfa_batch.shape[-1]
 
-    Ghalfa_batch = Ghalfa_batch.reshape(nwalkers, nalpha*nbasis)
-    Ghalfb_batch = Ghalfb_batch.reshape(nwalkers, nbeta*nbasis)
+    Ghalfa_batch = Ghalfa_batch.reshape(nwalkers, nalpha * nbasis)
+    Ghalfb_batch = Ghalfb_batch.reshape(nwalkers, nbeta * nbasis)
 
-    Xa = rchola.dot(Ghalfa_batch.real.T) + 1.j * rchola.dot(Ghalfa_batch.imag.T) # naux x nwalkers
-    Xb = rcholb.dot(Ghalfb_batch.real.T) + 1.j * rcholb.dot(Ghalfb_batch.imag.T) # naux x nwalkers
+    Xa = rchola.dot(Ghalfa_batch.real.T) + 1.0j * rchola.dot(Ghalfa_batch.imag.T)  # naux x nwalkers
+    Xb = rcholb.dot(Ghalfb_batch.real.T) + 1.0j * rcholb.dot(Ghalfb_batch.imag.T)  # naux x nwalkers
 
     ecoul = numpy.einsum("xw,xw->w", Xa, Xa, optimize=True)
     ecoul += numpy.einsum("xw,xw->w", Xb, Xb, optimize=True)
-    ecoul += 2. * numpy.einsum("xw,xw->w", Xa, Xb, optimize=True)
+    ecoul += 2.0 * numpy.einsum("xw,xw->w", Xa, Xb, optimize=True)
 
     Ghalfa_batch = Ghalfa_batch.reshape(nwalkers, nalpha, nbasis)
     Ghalfb_batch = Ghalfb_batch.reshape(nwalkers, nbeta, nbasis)
 
-    GhalfaT_batch = Ghalfa_batch.transpose(0,2,1).copy() # nw x nbasis x nocc
-    GhalfbT_batch = Ghalfb_batch.transpose(0,2,1).copy() # nw x nbasis x nocc
-    
-    Ta = numpy.zeros((nwalkers, nalpha,nalpha), dtype=numpy.complex128)
-    Tb = numpy.zeros((nwalkers, nbeta,nbeta), dtype=numpy.complex128)
+    GhalfaT_batch = Ghalfa_batch.transpose(0, 2, 1).copy()  # nw x nbasis x nocc
+    GhalfbT_batch = Ghalfb_batch.transpose(0, 2, 1).copy()  # nw x nbasis x nocc
 
-    exx  = numpy.zeros(nwalkers, dtype=numpy.complex128)  # we will iterate over cholesky index to update Ex energy for alpha and beta
+    Ta = numpy.zeros((nwalkers, nalpha, nalpha), dtype=numpy.complex128)
+    Tb = numpy.zeros((nwalkers, nbeta, nbeta), dtype=numpy.complex128)
+
+    exx = numpy.zeros(
+        nwalkers, dtype=numpy.complex128
+    )  # we will iterate over cholesky index to update Ex energy for alpha and beta
     for x in range(naux):  # write a cython function that calls blas for this.
-        rmi_a = rchola[x].reshape((nalpha,nbasis))
-        rmi_b = rcholb[x].reshape((nbeta,nbasis))
-        Ta[:,:,:].real = rmi_a.dot(GhalfaT_batch.real).transpose(1,0,2)
-        Ta[:,:,:].imag = rmi_a.dot(GhalfaT_batch.imag).transpose(1,0,2)
-        Tb[:,:,:].real = rmi_b.dot(GhalfbT_batch.real).transpose(1,0,2)
-        Tb[:,:,:].imag = rmi_b.dot(GhalfbT_batch.imag).transpose(1,0,2)
+        rmi_a = rchola[x].reshape((nalpha, nbasis))
+        rmi_b = rcholb[x].reshape((nbeta, nbasis))
+        Ta[:, :, :].real = rmi_a.dot(GhalfaT_batch.real).transpose(1, 0, 2)
+        Ta[:, :, :].imag = rmi_a.dot(GhalfaT_batch.imag).transpose(1, 0, 2)
+        Tb[:, :, :].real = rmi_b.dot(GhalfbT_batch.real).transpose(1, 0, 2)
+        Tb[:, :, :].imag = rmi_b.dot(GhalfbT_batch.imag).transpose(1, 0, 2)
 
-        exx += numpy.einsum("wij,wji->w",Ta,Ta,optimize=True) + numpy.einsum("wij,wji->w",Tb,Tb,optimize=True) 
+        exx += numpy.einsum("wij,wji->w", Ta, Ta, optimize=True) + numpy.einsum(
+            "wij,wji->w", Tb, Tb, optimize=True
+        )
 
     e2b = 0.5 * (ecoul - exx)
 
     return e2b
 
+
 nwalkers = 20
 
 nao = 200
 nocc = 50
 naux = nao * 4
-Ghalfa_batch = numpy.random.randn(nwalkers * nao * nocc).reshape(nwalkers, nocc,nao) + 1.j * numpy.random.randn(nwalkers * nao * nocc).reshape(nwalkers, nocc, nao)
-Ghalfb_batch = numpy.random.randn(nwalkers * nao * nocc).reshape(nwalkers, nocc,nao)+ 1.j * numpy.random.randn(nwalkers * nao * nocc).reshape(nwalkers, nocc, nao)
-rchola = numpy.random.randn(naux * nao * nocc).reshape(naux, nocc*nao)
-rcholb = numpy.random.randn(naux * nao * nocc).reshape(naux, nocc*nao)
+Ghalfa_batch = numpy.random.randn(nwalkers * nao * nocc).reshape(
+    nwalkers, nocc, nao
+) + 1.0j * numpy.random.randn(nwalkers * nao * nocc).reshape(nwalkers, nocc, nao)
+Ghalfb_batch = numpy.random.randn(nwalkers * nao * nocc).reshape(
+    nwalkers, nocc, nao
+) + 1.0j * numpy.random.randn(nwalkers * nao * nocc).reshape(nwalkers, nocc, nao)
+rchola = numpy.random.randn(naux * nao * nocc).reshape(naux, nocc * nao)
+rcholb = numpy.random.randn(naux * nao * nocc).reshape(naux, nocc * nao)
 energies = []
 
 start = time.time()
-for iw in range (nwalkers):
-    energies += [local_energy_generic_cholesky_opt(Ghalfa_batch[iw], Ghalfb_batch[iw], rchola, rcholb)]
+for iw in range(nwalkers):
+    energies += [
+        local_energy_generic_cholesky_opt(Ghalfa_batch[iw], Ghalfb_batch[iw], rchola, rcholb)
+    ]
 energies = numpy.complex128(energies)
-print("Current algorithm = {}".format(time.time()-start))
+print("Current algorithm = {}".format(time.time() - start))
 
 start = time.time()
 energies_batch = local_energy_generic_batch(Ghalfa_batch, Ghalfb_batch, rchola, rcholb)
-print("batch algorithm = {}".format(time.time()-start))
-
-assert(numpy.allclose(energies,energies_batch))
-
-
-
+print("batch algorithm = {}".format(time.time() - start))
 
+assert numpy.allclose(energies, energies_batch)
diff --git a/timing_scripts/local_energy_GPU.py b/timing_scripts/local_energy_GPU.py
index 7bb57476..3a1a5294 100644
--- a/timing_scripts/local_energy_GPU.py
+++ b/timing_scripts/local_energy_GPU.py
@@ -1,6 +1,6 @@
 import os
 
-os.environ['MKL_NUM_THREADS'] = '1'
+os.environ["MKL_NUM_THREADS"] = "1"
 import math
 import time
 import timeit
@@ -10,8 +10,9 @@
 from mpi4py import MPI
 from numba import cuda
 
-os.environ["OMP_NUM_THREADS"] = '1'
-os.environ["MKL_NUM_THREADS"] = '1'
+os.environ["OMP_NUM_THREADS"] = "1"
+os.environ["MKL_NUM_THREADS"] = "1"
+
 
 def local_energy_generic_cholesky_opt(Ghalfa, Ghalfb, rchola, rcholb):
     # Element wise multiplication.
@@ -19,33 +20,34 @@ def local_energy_generic_cholesky_opt(Ghalfa, Ghalfb, rchola, rcholb):
     nbeta = Ghalfb.shape[0]
     nbasis = Ghalfa.shape[-1]
 
-    Xa = rchola.dot(Ghalfa.real.ravel()) + 1.j * rchola.dot(Ghalfa.imag.ravel())
-    Xb = rcholb.dot(Ghalfb.real.ravel()) + 1.j * rcholb.dot(Ghalfb.imag.ravel())
+    Xa = rchola.dot(Ghalfa.real.ravel()) + 1.0j * rchola.dot(Ghalfa.imag.ravel())
+    Xb = rcholb.dot(Ghalfb.real.ravel()) + 1.0j * rcholb.dot(Ghalfb.imag.ravel())
 
-    ecoul = cupy.dot(Xa,Xa)
-    ecoul += cupy.dot(Xb,Xb)
-    ecoul += 2*cupy.dot(Xa,Xb)
+    ecoul = cupy.dot(Xa, Xa)
+    ecoul += cupy.dot(Xb, Xb)
+    ecoul += 2 * cupy.dot(Xa, Xb)
 
     GhalfaT = Ghalfa.T.copy()
-    GhalfbT = Ghalfb.T.copy() # nbasis x nocc
+    GhalfbT = Ghalfb.T.copy()  # nbasis x nocc
 
-    Ta = cupy.zeros((nalpha,nalpha), dtype=numpy.complex128)
-    Tb = cupy.zeros((nbeta,nbeta), dtype=numpy.complex128)
+    Ta = cupy.zeros((nalpha, nalpha), dtype=numpy.complex128)
+    Tb = cupy.zeros((nbeta, nbeta), dtype=numpy.complex128)
 
-    exx  = 0.j  # we will iterate over cholesky index to update Ex energy for alpha and beta
+    exx = 0.0j  # we will iterate over cholesky index to update Ex energy for alpha and beta
     for x in range(naux):  # write a cython function that calls blas for this.
-        rmi_a = rchola[x].reshape((nalpha,nbasis))
-        rmi_b = rcholb[x].reshape((nbeta,nbasis))
-        Ta[:,:].real = rmi_a.dot(GhalfaT.real)
-        Ta[:,:].imag = rmi_a.dot(GhalfaT.imag)  # this is a (nalpha, nalpha)
-        Tb[:,:].real = rmi_b.dot(GhalfbT.real)
-        Tb[:,:].imag = rmi_b.dot(GhalfbT.imag) # this is (nbeta, nbeta)
+        rmi_a = rchola[x].reshape((nalpha, nbasis))
+        rmi_b = rcholb[x].reshape((nbeta, nbasis))
+        Ta[:, :].real = rmi_a.dot(GhalfaT.real)
+        Ta[:, :].imag = rmi_a.dot(GhalfaT.imag)  # this is a (nalpha, nalpha)
+        Tb[:, :].real = rmi_b.dot(GhalfbT.real)
+        Tb[:, :].imag = rmi_b.dot(GhalfbT.imag)  # this is (nbeta, nbeta)
         exx += cupy.trace(Ta.dot(Ta)) + numpy.trace(Tb.dot(Tb))
 
     e2b = 0.5 * (ecoul - exx)
 
     return e2b
 
+
 def local_energy_generic_batch_old(Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
     # Element wise multiplication.
     nwalkers = Ghalfa_batch.shape[0]
@@ -53,45 +55,50 @@ def local_energy_generic_batch_old(Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
     nbeta = Ghalfb_batch.shape[1]
     nbasis = Ghalfa_batch.shape[-1]
 
-    Ghalfa_batch = Ghalfa_batch.reshape(nwalkers, nalpha*nbasis)
-    Ghalfb_batch = Ghalfb_batch.reshape(nwalkers, nbeta*nbasis)
+    Ghalfa_batch = Ghalfa_batch.reshape(nwalkers, nalpha * nbasis)
+    Ghalfb_batch = Ghalfb_batch.reshape(nwalkers, nbeta * nbasis)
 
-    Xa = rchola.dot(Ghalfa_batch.real.T) + 1.j * rchola.dot(Ghalfa_batch.imag.T) # naux x nwalkers
-    Xb = rcholb.dot(Ghalfb_batch.real.T) + 1.j * rcholb.dot(Ghalfb_batch.imag.T) # naux x nwalkers
+    Xa = rchola.dot(Ghalfa_batch.real.T) + 1.0j * rchola.dot(Ghalfa_batch.imag.T)  # naux x nwalkers
+    Xb = rcholb.dot(Ghalfb_batch.real.T) + 1.0j * rcholb.dot(Ghalfb_batch.imag.T)  # naux x nwalkers
 
     ecoul = cupy.einsum("xw,xw->w", Xa, Xa, optimize=True)
     ecoul += cupy.einsum("xw,xw->w", Xb, Xb, optimize=True)
-    ecoul += 2. * cupy.einsum("xw,xw->w", Xa, Xb, optimize=True)
+    ecoul += 2.0 * cupy.einsum("xw,xw->w", Xa, Xb, optimize=True)
 
     Ghalfa_batch = Ghalfa_batch.reshape(nwalkers, nalpha, nbasis)
     Ghalfb_batch = Ghalfb_batch.reshape(nwalkers, nbeta, nbasis)
 
-    GhalfaT_batch = Ghalfa_batch.transpose(0,2,1).copy() # nw x nbasis x nocc
-    GhalfbT_batch = Ghalfb_batch.transpose(0,2,1).copy() # nw x nbasis x nocc
+    GhalfaT_batch = Ghalfa_batch.transpose(0, 2, 1).copy()  # nw x nbasis x nocc
+    GhalfbT_batch = Ghalfb_batch.transpose(0, 2, 1).copy()  # nw x nbasis x nocc
 
     GhalfaT_batch_real = GhalfaT_batch.real.copy()
     GhalfaT_batch_imag = GhalfaT_batch.imag.copy()
     GhalfbT_batch_real = GhalfbT_batch.real.copy()
     GhalfbT_batch_imag = GhalfbT_batch.imag.copy()
 
-    Ta = cupy.zeros((nwalkers, nalpha,nalpha), dtype=numpy.complex128)
-    Tb = cupy.zeros((nwalkers, nbeta,nbeta), dtype=numpy.complex128)
+    Ta = cupy.zeros((nwalkers, nalpha, nalpha), dtype=numpy.complex128)
+    Tb = cupy.zeros((nwalkers, nbeta, nbeta), dtype=numpy.complex128)
 
-    exx  = cupy.zeros(nwalkers, dtype=numpy.complex128)  # we will iterate over cholesky index to update Ex energy for alpha and beta
+    exx = cupy.zeros(
+        nwalkers, dtype=numpy.complex128
+    )  # we will iterate over cholesky index to update Ex energy for alpha and beta
     for x in range(naux):  # write a cython function that calls blas for this.
-        rmi_a = rchola[x].reshape((nalpha,nbasis))
-        rmi_b = rcholb[x].reshape((nbeta,nbasis))
-        Ta[:,:,:].real = rmi_a.dot(GhalfaT_batch_real).transpose(1,0,2)
-        Ta[:,:,:].imag = rmi_a.dot(GhalfaT_batch_imag).transpose(1,0,2)
-        Tb[:,:,:].real = rmi_b.dot(GhalfbT_batch_real).transpose(1,0,2)
-        Tb[:,:,:].imag = rmi_b.dot(GhalfbT_batch_imag).transpose(1,0,2)
+        rmi_a = rchola[x].reshape((nalpha, nbasis))
+        rmi_b = rcholb[x].reshape((nbeta, nbasis))
+        Ta[:, :, :].real = rmi_a.dot(GhalfaT_batch_real).transpose(1, 0, 2)
+        Ta[:, :, :].imag = rmi_a.dot(GhalfaT_batch_imag).transpose(1, 0, 2)
+        Tb[:, :, :].real = rmi_b.dot(GhalfbT_batch_real).transpose(1, 0, 2)
+        Tb[:, :, :].imag = rmi_b.dot(GhalfbT_batch_imag).transpose(1, 0, 2)
 
-        exx += cupy.einsum("wij,wji->w",Ta,Ta,optimize=True) + cupy.einsum("wij,wji->w",Tb,Tb,optimize=True)
+        exx += cupy.einsum("wij,wji->w", Ta, Ta, optimize=True) + cupy.einsum(
+            "wij,wji->w", Tb, Tb, optimize=True
+        )
 
     e2b = 0.5 * (ecoul - exx)
 
     return e2b
 
+
 def local_energy_generic_batch(Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
     # Element wise multiplication.
     nwalkers = Ghalfa_batch.shape[0]
@@ -99,40 +106,45 @@ def local_energy_generic_batch(Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
     nbeta = Ghalfb_batch.shape[1]
     nbasis = Ghalfa_batch.shape[-1]
 
-    Ghalfa_batch = Ghalfa_batch.reshape(nwalkers, nalpha*nbasis)
-    Ghalfb_batch = Ghalfb_batch.reshape(nwalkers, nbeta*nbasis)
+    Ghalfa_batch = Ghalfa_batch.reshape(nwalkers, nalpha * nbasis)
+    Ghalfb_batch = Ghalfb_batch.reshape(nwalkers, nbeta * nbasis)
 
-    Xa = rchola.dot(Ghalfa_batch.real.T) + 1.j * rchola.dot(Ghalfa_batch.imag.T) # naux x nwalkers
-    Xb = rcholb.dot(Ghalfb_batch.real.T) + 1.j * rcholb.dot(Ghalfb_batch.imag.T) # naux x nwalkers
+    Xa = rchola.dot(Ghalfa_batch.real.T) + 1.0j * rchola.dot(Ghalfa_batch.imag.T)  # naux x nwalkers
+    Xb = rcholb.dot(Ghalfb_batch.real.T) + 1.0j * rcholb.dot(Ghalfb_batch.imag.T)  # naux x nwalkers
 
     ecoul = cupy.einsum("xw,xw->w", Xa, Xa, optimize=True)
     ecoul += cupy.einsum("xw,xw->w", Xb, Xb, optimize=True)
-    ecoul += 2. * cupy.einsum("xw,xw->w", Xa, Xb, optimize=True)
+    ecoul += 2.0 * cupy.einsum("xw,xw->w", Xa, Xb, optimize=True)
 
     Ghalfa_batch = Ghalfa_batch.reshape(nwalkers, nalpha, nbasis)
     Ghalfb_batch = Ghalfb_batch.reshape(nwalkers, nbeta, nbasis)
 
-    #Ghalfa_batch_real = Ghalfa_batch.real.copy()
-    #Ghalfa_batch_imag = Ghalfa_batch.imag.copy()
-    #Ghalfb_batch_real = Ghalfb_batch.real.copy()
-    #Ghalfb_batch_imag = Ghalfb_batch.imag.copy()
+    # Ghalfa_batch_real = Ghalfa_batch.real.copy()
+    # Ghalfa_batch_imag = Ghalfa_batch.imag.copy()
+    # Ghalfb_batch_real = Ghalfb_batch.real.copy()
+    # Ghalfb_batch_imag = Ghalfb_batch.imag.copy()
 
-    Ta = cupy.zeros((nwalkers, nalpha,nalpha), dtype=numpy.complex128)
-    Tb = cupy.zeros((nwalkers, nbeta,nbeta), dtype=numpy.complex128)
+    Ta = cupy.zeros((nwalkers, nalpha, nalpha), dtype=numpy.complex128)
+    Tb = cupy.zeros((nwalkers, nbeta, nbeta), dtype=numpy.complex128)
 
-    exx  = cupy.zeros(nwalkers, dtype=numpy.complex128)  # we will iterate over cholesky index to update Ex energy for alpha and beta
+    exx = cupy.zeros(
+        nwalkers, dtype=numpy.complex128
+    )  # we will iterate over cholesky index to update Ex energy for alpha and beta
     for x in range(naux):  # write a cython function that calls blas for this.
-        rmi_a = rchola[x].reshape((nalpha,nbasis))
-        rmi_b = rcholb[x].reshape((nbeta,nbasis))
+        rmi_a = rchola[x].reshape((nalpha, nbasis))
+        rmi_b = rcholb[x].reshape((nbeta, nbasis))
         Ta = Ghalfa_batch @ rmi_a.T
         Tb = Ghalfb_batch @ rmi_b.T
 
-        exx += cupy.einsum("wij,wji->w",Ta,Ta,optimize=True) + cupy.einsum("wij,wji->w",Tb,Tb,optimize=True)
+        exx += cupy.einsum("wij,wji->w", Ta, Ta, optimize=True) + cupy.einsum(
+            "wij,wji->w", Tb, Tb, optimize=True
+        )
 
     e2b = 0.5 * (ecoul - exx)
 
     return e2b
 
+
 @cuda.jit
 def exx_numba(exx_chol, Ta, Tb, Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
     nwalkers = Ghalfa_batch.shape[0]
@@ -144,18 +156,19 @@ def exx_numba(exx_chol, Ta, Tb, Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
     pos1 = pos // nchol
     pos2 = pos - pos1 * nchol
 
-    if (pos1 < nwalkers and pos2 < nchol):
+    if pos1 < nwalkers and pos2 < nchol:
         rmi_a = rchola[pos2]
         rmi_b = rcholb[pos2]
         Ta = rmi_a.dot(Ghalfa_batch[pos1])
         Tb = rmi_b.dot(Ghalfb_batch[pos1])
-        #for i in range(nalpha):
+        # for i in range(nalpha):
         #    for j in range(nalpha):
         #        exx_chol[pos1,pos2] = Ta[i,j]*Ta[j,i]
-        #for i in range(nalpha):
+        # for i in range(nalpha):
         #    for j in range(nalpha):
         #        exx_chol[pos1,pos2] = Tb[i,j]*Tb[j,i]
 
+
 def local_energy_generic_numba(Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
     # Element wise multiplication.
     nwalkers = Ghalfa_batch.shape[0]
@@ -164,20 +177,20 @@ def local_energy_generic_numba(Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
     nbasis = Ghalfa_batch.shape[-1]
     nchol = rchola.shape[0]
 
-    Ghalfa_batch = Ghalfa_batch.reshape(nwalkers, nalpha*nbasis)
-    Ghalfb_batch = Ghalfb_batch.reshape(nwalkers, nbeta*nbasis)
+    Ghalfa_batch = Ghalfa_batch.reshape(nwalkers, nalpha * nbasis)
+    Ghalfb_batch = Ghalfb_batch.reshape(nwalkers, nbeta * nbasis)
 
-    Xa = rchola.dot(Ghalfa_batch.real.T) + 1.j * rchola.dot(Ghalfa_batch.imag.T) # naux x nwalkers
-    Xb = rcholb.dot(Ghalfb_batch.real.T) + 1.j * rcholb.dot(Ghalfb_batch.imag.T) # naux x nwalkers
+    Xa = rchola.dot(Ghalfa_batch.real.T) + 1.0j * rchola.dot(Ghalfa_batch.imag.T)  # naux x nwalkers
+    Xb = rcholb.dot(Ghalfb_batch.real.T) + 1.0j * rcholb.dot(Ghalfb_batch.imag.T)  # naux x nwalkers
 
     ecoul = cupy.einsum("xw,xw->w", Xa, Xa, optimize=True)
     ecoul += cupy.einsum("xw,xw->w", Xb, Xb, optimize=True)
-    ecoul += 2. * cupy.einsum("xw,xw->w", Xa, Xb, optimize=True)
+    ecoul += 2.0 * cupy.einsum("xw,xw->w", Xa, Xb, optimize=True)
 
     Ghalfa_batch = Ghalfa_batch.reshape(nwalkers, nalpha, nbasis)
     Ghalfb_batch = Ghalfb_batch.reshape(nwalkers, nbeta, nbasis)
-    Ta = cupy.zeros((nwalkers, nalpha,nalpha), dtype=numpy.float64)
-    Tb = cupy.zeros((nwalkers, nbeta,nbeta), dtype=numpy.float64)
+    Ta = cupy.zeros((nwalkers, nalpha, nalpha), dtype=numpy.float64)
+    Tb = cupy.zeros((nwalkers, nbeta, nbeta), dtype=numpy.float64)
 
     Ghalfa_batch_real = Ghalfa_batch.real.copy()
     Ghalfa_batch_imag = Ghalfa_batch.imag.copy()
@@ -187,27 +200,32 @@ def local_energy_generic_numba(Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
     rchola = rchola.reshape(nchol, nalpha, nbasis)
     rcholb = rcholb.reshape(nchol, nbeta, nbasis)
 
-    exx_chol  = cupy.zeros((nwalkers, nchol), dtype=numpy.float64)  # we will iterate over cholesky index to update Ex energy for alpha and beta
+    exx_chol = cupy.zeros(
+        (nwalkers, nchol), dtype=numpy.float64
+    )  # we will iterate over cholesky index to update Ex energy for alpha and beta
     threadsperblock = 512
-    blockspergrid = math.ceil(nchol*nwalkers / threadsperblock)
-    exx_numba[blockspergrid, threadsperblock](exx_chol, Ta, Tb, Ghalfa_batch_real, Ghalfb_batch_real, rchola, rcholb)
-    exx = cupy.einsum("wx->w",exx_chol)
-    #nchol = rchola.shape[0]
-    #rchola = rchola.reshape(nchol, nalpha, nbasis)
-    #rcholb = rcholb.reshape(nchol, nbeta, nbasis)
-
-    #Txij = cupy.einsum("xim,wjm->wxji", rchola, Ghalfa_batch)
-    #exx = cupy.einsum("wxji,wxij->w",Txij,Txij)
-    #Txij = cupy.einsum("xim,wjm->wxji", rcholb, Ghalfb_batch)
-    #exx += cupy.einsum("wxji,wxij->w",Txij,Txij)
-
-    #exx = cupy.einsum("xim,xjn,win,wjm->w",rchola, rchola, Ghalfa_batch, Ghalfa_batch, optimize=True)\
+    blockspergrid = math.ceil(nchol * nwalkers / threadsperblock)
+    exx_numba[blockspergrid, threadsperblock](
+        exx_chol, Ta, Tb, Ghalfa_batch_real, Ghalfb_batch_real, rchola, rcholb
+    )
+    exx = cupy.einsum("wx->w", exx_chol)
+    # nchol = rchola.shape[0]
+    # rchola = rchola.reshape(nchol, nalpha, nbasis)
+    # rcholb = rcholb.reshape(nchol, nbeta, nbasis)
+
+    # Txij = cupy.einsum("xim,wjm->wxji", rchola, Ghalfa_batch)
+    # exx = cupy.einsum("wxji,wxij->w",Txij,Txij)
+    # Txij = cupy.einsum("xim,wjm->wxji", rcholb, Ghalfb_batch)
+    # exx += cupy.einsum("wxji,wxij->w",Txij,Txij)
+
+    # exx = cupy.einsum("xim,xjn,win,wjm->w",rchola, rchola, Ghalfa_batch, Ghalfa_batch, optimize=True)\
     #    + cupy.einsum("xim,xjn,win,wjm->w",rcholb, rcholb, Ghalfb_batch, Ghalfb_batch, optimize=True)
 
     e2b = 0.5 * (ecoul - exx)
 
     return e2b
 
+
 divide = 5
 nao = 439 // divide
 nocca = 94 // divide
@@ -216,10 +234,14 @@ def local_energy_generic_numba(Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
 nwalkers = 50
 nblocks = 5
 
-Ghalfa_batch = numpy.random.randn(nwalkers * nao * nocca).reshape(nwalkers, nocca,nao) + 1.j * numpy.random.randn(nwalkers * nao * nocca).reshape(nwalkers, nocca, nao)
-Ghalfb_batch = numpy.random.randn(nwalkers * nao * noccb).reshape(nwalkers, noccb,nao)+ 1.j * numpy.random.randn(nwalkers * nao * noccb).reshape(nwalkers, noccb, nao)
-rchola = numpy.random.randn(naux * nao * nocca).reshape(naux, nocca*nao)
-rcholb = numpy.random.randn(naux * nao * noccb).reshape(naux, noccb*nao)
+Ghalfa_batch = numpy.random.randn(nwalkers * nao * nocca).reshape(
+    nwalkers, nocca, nao
+) + 1.0j * numpy.random.randn(nwalkers * nao * nocca).reshape(nwalkers, nocca, nao)
+Ghalfb_batch = numpy.random.randn(nwalkers * nao * noccb).reshape(
+    nwalkers, noccb, nao
+) + 1.0j * numpy.random.randn(nwalkers * nao * noccb).reshape(nwalkers, noccb, nao)
+rchola = numpy.random.randn(naux * nao * nocca).reshape(naux, nocca * nao)
+rcholb = numpy.random.randn(naux * nao * noccb).reshape(naux, noccb * nao)
 
 comm = MPI.COMM_WORLD
 size = comm.Get_size()
@@ -228,25 +250,25 @@ def local_energy_generic_numba(Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
 nalpha = nocca
 nbeta = noccb
 nbasis = nao
-#Ghalfa_batch = Ghalfa_batch.reshape(nwalkers, nalpha, nbasis)
-#Ghalfb_batch = Ghalfb_batch.reshape(nwalkers, nbeta, nbasis)
-#rchola = rchola.reshape(nchol, nalpha, nbasis)
-#rcholb = rcholb.reshape(nchol, nbeta, nbasis)
-#path_info = numpy.einsum_path("xim,xjn,win,wjm->w",rcholb, rcholb, Ghalfb_batch, Ghalfb_batch, optimize='greedy')
-#print(path_info[0])
-#print(path_info[1])
-#path_info = numpy.einsum_path("xim,wjm->wxij",rcholb, Ghalfb_batch, optimize='greedy')
-#print(path_info[0])
-#print(path_info[1])
-#Txij = numpy.zeros((nwalkers,nchol,noccb,noccb))
-#path_info = numpy.einsum_path("wxij,wxij->w",Txij,Txij, optimize='greedy')
-#print(path_info[0])
-#print(path_info[1])
-#Txij = numpy.zeros((nwalkers,nchol,noccb,noccb))
-#print(Txij.size)
-#exit()
+# Ghalfa_batch = Ghalfa_batch.reshape(nwalkers, nalpha, nbasis)
+# Ghalfb_batch = Ghalfb_batch.reshape(nwalkers, nbeta, nbasis)
+# rchola = rchola.reshape(nchol, nalpha, nbasis)
+# rcholb = rcholb.reshape(nchol, nbeta, nbasis)
+# path_info = numpy.einsum_path("xim,xjn,win,wjm->w",rcholb, rcholb, Ghalfb_batch, Ghalfb_batch, optimize='greedy')
+# print(path_info[0])
+# print(path_info[1])
+# path_info = numpy.einsum_path("xim,wjm->wxij",rcholb, Ghalfb_batch, optimize='greedy')
+# print(path_info[0])
+# print(path_info[1])
+# Txij = numpy.zeros((nwalkers,nchol,noccb,noccb))
+# path_info = numpy.einsum_path("wxij,wxij->w",Txij,Txij, optimize='greedy')
+# print(path_info[0])
+# print(path_info[1])
+# Txij = numpy.zeros((nwalkers,nchol,noccb,noccb))
+# print(Txij.size)
+# exit()
 with cupy.cuda.Device(rank):
-    mat = cupy.array(numpy.random.rand(2,2))
+    mat = cupy.array(numpy.random.rand(2, 2))
     warmup = cupy.einsum("ab,bc->ac", mat, mat, optimize=True)
 
     Ghalfa_batch = cupy.asarray(Ghalfa_batch, dtype=cupy.complex128)
@@ -254,27 +276,23 @@ def local_energy_generic_numba(Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
     rchola = cupy.asarray(rchola, dtype=cupy.float64)
     rcholb = cupy.asarray(rcholb, dtype=cupy.float64)
 
-    #energies_batch = local_energy_generic_batch_old(Ghalfa_batch, Ghalfb_batch, rchola, rcholb)
-    #start = time.time()
-    #for i in range (nblocks):
+    # energies_batch = local_energy_generic_batch_old(Ghalfa_batch, Ghalfb_batch, rchola, rcholb)
+    # start = time.time()
+    # for i in range (nblocks):
     #    energies_batch = local_energy_generic_batch_old(Ghalfa_batch, Ghalfb_batch, rchola, rcholb)
-    #print("Current algorithm = {}".format(time.time()-start))
+    # print("Current algorithm = {}".format(time.time()-start))
 
-    #energies_batch2 = local_energy_generic_batch(Ghalfa_batch, Ghalfb_batch, rchola, rcholb)
-    #start = time.time()
-    #for i in range (nblocks):
+    # energies_batch2 = local_energy_generic_batch(Ghalfa_batch, Ghalfb_batch, rchola, rcholb)
+    # start = time.time()
+    # for i in range (nblocks):
     #    energies_batch2 = local_energy_generic_batch(Ghalfa_batch, Ghalfb_batch, rchola, rcholb)
-    #print("new batch algorithm = {}".format(time.time()-start))
+    # print("new batch algorithm = {}".format(time.time()-start))
 
     energies_batch3 = local_energy_generic_numba(Ghalfa_batch, Ghalfb_batch, rchola, rcholb)
     start = time.time()
-    for i in range (nblocks):
+    for i in range(nblocks):
         energies_batch3 = local_energy_generic_numba(Ghalfa_batch, Ghalfb_batch, rchola, rcholb)
-    print("new einsum algorithm = {}".format(time.time()-start))
-
-    assert(cupy.allclose(energies_batch,energies_batch2))
-    assert(cupy.allclose(energies_batch3,energies_batch2))
-
-
-
+    print("new einsum algorithm = {}".format(time.time() - start))
 
+    assert cupy.allclose(energies_batch, energies_batch2)
+    assert cupy.allclose(energies_batch3, energies_batch2)
diff --git a/timing_scripts/local_energy_exx_CPU.py b/timing_scripts/local_energy_exx_CPU.py
index 83cf516d..3761550c 100644
--- a/timing_scripts/local_energy_exx_CPU.py
+++ b/timing_scripts/local_energy_exx_CPU.py
@@ -2,15 +2,17 @@
 
 from line_profiler import LineProfiler
 
-os.environ['MKL_NUM_THREADS'] = '1'
+os.environ["MKL_NUM_THREADS"] = "1"
 import time
 import timeit
 
 import numpy
 
 from ipie.estimators.opt_local_energy import (
-    local_energy_generic_cholesky_exx, local_energy_generic_cholesky_exx_batch,
-    local_energy_generic_cholesky_exx_rhf_batch)
+    local_energy_generic_cholesky_exx,
+    local_energy_generic_cholesky_exx_batch,
+    local_energy_generic_cholesky_exx_rhf_batch,
+)
 
 
 def local_energy_generic_cholesky_opt(Ghalfa, Ghalfb, rchola, rcholb):
@@ -20,19 +22,19 @@ def local_energy_generic_cholesky_opt(Ghalfa, Ghalfb, rchola, rcholb):
     nbasis = Ghalfa.shape[-1]
 
     GhalfaT = Ghalfa.T.copy()
-    GhalfbT = Ghalfb.T.copy() # nbasis x nocc
-    
-    Ta = numpy.zeros((nalpha,nalpha), dtype=numpy.complex128)
-    Tb = numpy.zeros((nbeta,nbeta), dtype=numpy.complex128)
+    GhalfbT = Ghalfb.T.copy()  # nbasis x nocc
 
-    exx  = 0.j  # we will iterate over cholesky index to update Ex energy for alpha and beta
+    Ta = numpy.zeros((nalpha, nalpha), dtype=numpy.complex128)
+    Tb = numpy.zeros((nbeta, nbeta), dtype=numpy.complex128)
+
+    exx = 0.0j  # we will iterate over cholesky index to update Ex energy for alpha and beta
     for x in range(naux):  # write a cython function that calls blas for this.
-        rmi_a = rchola[x].reshape((nalpha,nbasis))
-        rmi_b = rcholb[x].reshape((nbeta,nbasis))
-        Ta[:,:].real = rmi_a.dot(GhalfaT.real) 
-        Ta[:,:].imag = rmi_a.dot(GhalfaT.imag)  # this is a (nalpha, nalpha)
-        Tb[:,:].real = rmi_b.dot(GhalfbT.real) 
-        Tb[:,:].imag = rmi_b.dot(GhalfbT.imag) # this is (nbeta, nbeta)
+        rmi_a = rchola[x].reshape((nalpha, nbasis))
+        rmi_b = rcholb[x].reshape((nbeta, nbasis))
+        Ta[:, :].real = rmi_a.dot(GhalfaT.real)
+        Ta[:, :].imag = rmi_a.dot(GhalfaT.imag)  # this is a (nalpha, nalpha)
+        Tb[:, :].real = rmi_b.dot(GhalfbT.real)
+        Tb[:, :].imag = rmi_b.dot(GhalfbT.imag)  # this is (nbeta, nbeta)
         exx += numpy.trace(Ta.dot(Ta)) + numpy.trace(Tb.dot(Tb))
 
     # e2b = 0.5 * (ecoul - exx)
@@ -40,6 +42,7 @@ def local_energy_generic_cholesky_opt(Ghalfa, Ghalfb, rchola, rcholb):
 
     return e2b
 
+
 def local_energy_generic_cholesky_opt_cubic_rhf(Ghalfa, Ghalfb, rchola, rcholb):
     nalpha = Ghalfa.shape[0]
     nbeta = Ghalfb.shape[0]
@@ -47,28 +50,29 @@ def local_energy_generic_cholesky_opt_cubic_rhf(Ghalfa, Ghalfb, rchola, rcholb):
 
     GhalfaT = Ghalfa.T.copy()
     # GhalfbT = Ghalfb.T.copy() # nbasis x nocc
-    
-    Ta = numpy.zeros((naux, nalpha,nalpha), dtype=numpy.complex128)
+
+    Ta = numpy.zeros((naux, nalpha, nalpha), dtype=numpy.complex128)
     # Tb = numpy.zeros((naux, nbeta,nbeta), dtype=numpy.complex128)
 
-    rchola = rchola.reshape((naux,nalpha,nbasis))
+    rchola = rchola.reshape((naux, nalpha, nbasis))
     # rcholb = rcholb.reshape((naux,nbeta,nbasis))
 
-    Ta.real = numpy.einsum("xim,mj->xij",rchola,GhalfaT.real, optimize=True)
-    Ta.imag = numpy.einsum("xim,mj->xij",rchola,GhalfaT.imag, optimize=True)
+    Ta.real = numpy.einsum("xim,mj->xij", rchola, GhalfaT.real, optimize=True)
+    Ta.imag = numpy.einsum("xim,mj->xij", rchola, GhalfaT.imag, optimize=True)
     # Tb.real = numpy.einsum("xim,mj->xij",rcholb,GhalfbT.real, optimize=True)
     # Tb.imag = numpy.einsum("xim,mj->xij",rcholb,GhalfbT.imag, optimize=True)
-    
+
     # exxa = numpy.tensordot(Ta, Ta, axes=((0,1,2),(0,2,1)))
     # exxb = numpy.tensordot(Tb, Tb, axes=((0,1,2),(0,2,1)))
-    exxa = numpy.einsum("xij,xji->",Ta, Ta, optimize=True)
+    exxa = numpy.einsum("xij,xji->", Ta, Ta, optimize=True)
     # exxb = numpy.einsum("xij,xji->",Tb, Tb, optimize=True)
 
-    rchola = rchola.reshape((naux,nalpha*nbasis))
+    rchola = rchola.reshape((naux, nalpha * nbasis))
     # rcholb = rcholb.reshape((naux,nbeta*nbasis))
 
     # return -0.5 *(exxa+exxb)
-    return -0.5 *(exxa)
+    return -0.5 * (exxa)
+
 
 def local_energy_generic_cholesky_opt_new(Ghalfa, Ghalfb, rchola, rcholb):
     # Element wise multiplication.
@@ -77,19 +81,19 @@ def local_energy_generic_cholesky_opt_new(Ghalfa, Ghalfb, rchola, rcholb):
     nbasis = Ghalfa.shape[-1]
 
     GhalfaT = Ghalfa.T.copy()
-    GhalfbT = Ghalfb.T.copy() # nbasis x nocc
-    
-    Ta = numpy.zeros((nalpha,nalpha), dtype=numpy.complex128)
-    Tb = numpy.zeros((nbeta,nbeta), dtype=numpy.complex128)
+    GhalfbT = Ghalfb.T.copy()  # nbasis x nocc
+
+    Ta = numpy.zeros((nalpha, nalpha), dtype=numpy.complex128)
+    Tb = numpy.zeros((nbeta, nbeta), dtype=numpy.complex128)
 
-    exx  = 0.j  # we will iterate over cholesky index to update Ex energy for alpha and beta
+    exx = 0.0j  # we will iterate over cholesky index to update Ex energy for alpha and beta
     for x in range(naux):  # write a cython function that calls blas for this.
-        rmi_a = rchola[x].reshape((nalpha,nbasis))
-        rmi_b = rcholb[x].reshape((nbeta,nbasis))
-        Ta[:,:].real = rmi_a.dot(GhalfaT.real) 
-        Ta[:,:].imag = rmi_a.dot(GhalfaT.imag)  # this is a (nalpha, nalpha)
-        Tb[:,:].real = rmi_b.dot(GhalfbT.real) 
-        Tb[:,:].imag = rmi_b.dot(GhalfbT.imag) # this is (nbeta, nbeta)
+        rmi_a = rchola[x].reshape((nalpha, nbasis))
+        rmi_b = rcholb[x].reshape((nbeta, nbasis))
+        Ta[:, :].real = rmi_a.dot(GhalfaT.real)
+        Ta[:, :].imag = rmi_a.dot(GhalfaT.imag)  # this is a (nalpha, nalpha)
+        Tb[:, :].real = rmi_b.dot(GhalfbT.real)
+        Tb[:, :].imag = rmi_b.dot(GhalfbT.imag)  # this is (nbeta, nbeta)
         exx += numpy.einsum("ij,ji->", Ta, Ta) + numpy.einsum("ij,ji->", Tb, Tb)
 
     # e2b = 0.5 * (ecoul - exx)
@@ -97,26 +101,28 @@ def local_energy_generic_cholesky_opt_new(Ghalfa, Ghalfb, rchola, rcholb):
 
     return e2b
 
+
 def local_energy_generic_cholesky_opt_rhf(Ghalfa, rchola):
     # Element wise multiplication.
     nalpha = Ghalfa.shape[0]
     nbasis = Ghalfa.shape[1]
 
     GhalfaT = Ghalfa.T.copy()
-    
-    Ta = numpy.zeros((nalpha,nalpha), dtype=numpy.complex128)
 
-    exx  = 0.j  # we will iterate over cholesky index to update Ex energy for alpha and beta
+    Ta = numpy.zeros((nalpha, nalpha), dtype=numpy.complex128)
+
+    exx = 0.0j  # we will iterate over cholesky index to update Ex energy for alpha and beta
     for rmi_a in rchola:
-        rmi_a = rmi_a.reshape((nalpha,nbasis))
-        Ta[:,:].real = rmi_a.dot(GhalfaT.real) 
-        Ta[:,:].imag = rmi_a.dot(GhalfaT.imag)  # this is a (nalpha, nalpha)
+        rmi_a = rmi_a.reshape((nalpha, nbasis))
+        Ta[:, :].real = rmi_a.dot(GhalfaT.real)
+        Ta[:, :].imag = rmi_a.dot(GhalfaT.imag)  # this is a (nalpha, nalpha)
         exx += numpy.einsum("ij,ji->", Ta, Ta) * 2.0
 
     e2b = -0.5 * exx
 
     return e2b
 
+
 def local_energy_generic_cholesky_opt_rhf_batch(Ghalfa_batch, rchola):
     # Element wise multiplication.
     nwalkers = Ghalfa_batch.shape[0]
@@ -125,22 +131,23 @@ def local_energy_generic_cholesky_opt_rhf_batch(Ghalfa_batch, rchola):
     nchol = rchola.shape[0]
 
     Ghalfa_batch = Ghalfa_batch.reshape(nwalkers, nalpha, nbasis)
-    GhalfaT_batch = Ghalfa_batch.transpose(0,2,1).copy() # nw x nbasis x nocc
+    GhalfaT_batch = Ghalfa_batch.transpose(0, 2, 1).copy()  # nw x nbasis x nocc
 
-    Ta = numpy.zeros((nalpha,nalpha), dtype=numpy.complex128)
+    Ta = numpy.zeros((nalpha, nalpha), dtype=numpy.complex128)
     exx = numpy.zeros((nwalkers), dtype=numpy.complex128)
 
     for x in range(nchol):
-        rmi_a = rchola[x].reshape((nalpha,nbasis))
+        rmi_a = rchola[x].reshape((nalpha, nbasis))
         for iw in range(nwalkers):
-            Ta[:,:].real = rmi_a.dot(GhalfaT_batch[iw].real)
-            Ta[:,:].imag = rmi_a.dot(GhalfaT_batch[iw].imag)
-            exx[iw] += 2.*numpy.einsum("ij,ji->",Ta,Ta)
+            Ta[:, :].real = rmi_a.dot(GhalfaT_batch[iw].real)
+            Ta[:, :].imag = rmi_a.dot(GhalfaT_batch[iw].imag)
+            exx[iw] += 2.0 * numpy.einsum("ij,ji->", Ta, Ta)
 
     e2b = -0.5 * exx
 
     return e2b
 
+
 def local_energy_generic_batch(Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
     # Element wise multiplication.
     nwalkers = Ghalfa_batch.shape[0]
@@ -151,38 +158,47 @@ def local_energy_generic_batch(Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
     Ghalfa_batch = Ghalfa_batch.reshape(nwalkers, nalpha, nbasis)
     Ghalfb_batch = Ghalfb_batch.reshape(nwalkers, nbeta, nbasis)
 
-    GhalfaT_batch = Ghalfa_batch.transpose(0,2,1).copy() # nw x nbasis x nocc
-    GhalfbT_batch = Ghalfb_batch.transpose(0,2,1).copy() # nw x nbasis x nocc
-    
-    Ta = numpy.zeros((nwalkers, nalpha,nalpha), dtype=numpy.complex128)
-    Tb = numpy.zeros((nwalkers, nbeta,nbeta), dtype=numpy.complex128)
+    GhalfaT_batch = Ghalfa_batch.transpose(0, 2, 1).copy()  # nw x nbasis x nocc
+    GhalfbT_batch = Ghalfb_batch.transpose(0, 2, 1).copy()  # nw x nbasis x nocc
+
+    Ta = numpy.zeros((nwalkers, nalpha, nalpha), dtype=numpy.complex128)
+    Tb = numpy.zeros((nwalkers, nbeta, nbeta), dtype=numpy.complex128)
 
-    exx  = numpy.zeros(nwalkers, dtype=numpy.complex128)  # we will iterate over cholesky index to update Ex energy for alpha and beta
+    exx = numpy.zeros(
+        nwalkers, dtype=numpy.complex128
+    )  # we will iterate over cholesky index to update Ex energy for alpha and beta
     for x in range(naux):  # write a cython function that calls blas for this.
-        rmi_a = rchola[x].reshape((nalpha,nbasis))
-        rmi_b = rcholb[x].reshape((nbeta,nbasis))
-        Ta[:,:,:].real = rmi_a.dot(GhalfaT_batch.real).transpose(1,0,2)
-        Ta[:,:,:].imag = rmi_a.dot(GhalfaT_batch.imag).transpose(1,0,2)
-        Tb[:,:,:].real = rmi_b.dot(GhalfbT_batch.real).transpose(1,0,2)
-        Tb[:,:,:].imag = rmi_b.dot(GhalfbT_batch.imag).transpose(1,0,2)
+        rmi_a = rchola[x].reshape((nalpha, nbasis))
+        rmi_b = rcholb[x].reshape((nbeta, nbasis))
+        Ta[:, :, :].real = rmi_a.dot(GhalfaT_batch.real).transpose(1, 0, 2)
+        Ta[:, :, :].imag = rmi_a.dot(GhalfaT_batch.imag).transpose(1, 0, 2)
+        Tb[:, :, :].real = rmi_b.dot(GhalfbT_batch.real).transpose(1, 0, 2)
+        Tb[:, :, :].imag = rmi_b.dot(GhalfbT_batch.imag).transpose(1, 0, 2)
 
-        exx += numpy.einsum("wij,wji->w",Ta,Ta,optimize=True) + numpy.einsum("wij,wji->w",Tb,Tb,optimize=True) 
+        exx += numpy.einsum("wij,wji->w", Ta, Ta, optimize=True) + numpy.einsum(
+            "wij,wji->w", Tb, Tb, optimize=True
+        )
 
     # e2b = 0.5 * (ecoul - exx)
     e2b = -0.5 * exx
 
     return e2b
 
+
 nwalkers = 50
 
 nmult = 1
 nao = 108 * nmult
 nocc = 15 * nmult
 naux = 693 * nmult
-Ghalfa_batch = numpy.random.randn(nwalkers * nao * nocc).reshape(nwalkers, nocc,nao) + 1.j * numpy.random.randn(nwalkers * nao * nocc).reshape(nwalkers, nocc, nao)
-Ghalfb_batch = numpy.random.randn(nwalkers * nao * nocc).reshape(nwalkers, nocc,nao)+ 1.j * numpy.random.randn(nwalkers * nao * nocc).reshape(nwalkers, nocc, nao)
-rchola = numpy.random.randn(naux * nao * nocc).reshape(naux, nocc*nao)
-rcholb = numpy.random.randn(naux * nao * nocc).reshape(naux, nocc*nao)
+Ghalfa_batch = numpy.random.randn(nwalkers * nao * nocc).reshape(
+    nwalkers, nocc, nao
+) + 1.0j * numpy.random.randn(nwalkers * nao * nocc).reshape(nwalkers, nocc, nao)
+Ghalfb_batch = numpy.random.randn(nwalkers * nao * nocc).reshape(
+    nwalkers, nocc, nao
+) + 1.0j * numpy.random.randn(nwalkers * nao * nocc).reshape(nwalkers, nocc, nao)
+rchola = numpy.random.randn(naux * nao * nocc).reshape(naux, nocc * nao)
+rcholb = numpy.random.randn(naux * nao * nocc).reshape(naux, nocc * nao)
 energies = []
 energies2 = []
 energies3 = []
@@ -203,16 +219,20 @@ def local_energy_generic_batch(Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
 # print("new algorithm = {}".format(time.time()-start))
 
 start = time.time()
-for iw in range (nwalkers):
+for iw in range(nwalkers):
     energies3 += [local_energy_generic_cholesky_opt_rhf(Ghalfa_batch[iw], rchola)]
 energies3 = numpy.complex128(energies3)
-print("new algorithm (RHF) = {}".format(time.time()-start))
+print("new algorithm (RHF) = {}".format(time.time() - start))
 
 start = time.time()
-for iw in range (nwalkers):
-    energies6 += [local_energy_generic_cholesky_opt_cubic_rhf(Ghalfa_batch[iw], Ghalfb_batch[iw], rchola, rcholb)]
+for iw in range(nwalkers):
+    energies6 += [
+        local_energy_generic_cholesky_opt_cubic_rhf(
+            Ghalfa_batch[iw], Ghalfb_batch[iw], rchola, rcholb
+        )
+    ]
 energies6 = numpy.complex128(energies6)
-print("cubic algorithm (RHF) = {}".format(time.time()-start))
+print("cubic algorithm (RHF) = {}".format(time.time() - start))
 
 # start = time.time()
 # energies4 = local_energy_generic_cholesky_opt_rhf_batch(Ghalfa_batch, Ghalfb_batch, rchola, rcholb)
@@ -220,12 +240,12 @@ def local_energy_generic_batch(Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
 # print("new algorithm batch (RHF) = {}".format(time.time()-start))
 
 start = time.time()
-rchola = rchola.reshape(naux,nocc,nao)
-GhalfaT_batch = Ghalfa_batch.transpose(0,2,1).copy()
+rchola = rchola.reshape(naux, nocc, nao)
+GhalfaT_batch = Ghalfa_batch.transpose(0, 2, 1).copy()
 
 energies5 = local_energy_generic_cholesky_exx_rhf_batch(GhalfaT_batch, rchola)
 energies5 = numpy.complex128(energies5)
-print("new algorithm batch (RHF, Cython) = {}".format(time.time()-start))
+print("new algorithm batch (RHF, Cython) = {}".format(time.time() - start))
 
 # assert(numpy.allclose(energies,energies2))
 # assert(numpy.allclose(energies3,energies4))
@@ -238,13 +258,13 @@ def local_energy_generic_batch(Ghalfa_batch, Ghalfb_batch, rchola, rcholb):
 pr.enable()
 
 start = time.time()
-for iw in range (nwalkers):
+for iw in range(nwalkers):
     energies3 += [local_energy_generic_cholesky_opt_rhf(Ghalfa_batch[iw], rchola)]
 # lp = LineProfiler()
 # lp_wrapper = lp(local_energy_generic_cholesky_opt_rhf_batch)
 # lp_wrapper(Ghalfa_batch, rchola)
 # lp.print_stats()
-print("local_energy_generic_cholesky_opt_rhf profiled = {}".format(time.time()-start))
+print("local_energy_generic_cholesky_opt_rhf profiled = {}".format(time.time() - start))
 
 pr.disable()
-pr.print_stats(sort='tottime')
+pr.print_stats(sort="tottime")
diff --git a/timing_scripts/local_energy_kernel.py b/timing_scripts/local_energy_kernel.py
index bda5f0a7..bac89248 100644
--- a/timing_scripts/local_energy_kernel.py
+++ b/timing_scripts/local_energy_kernel.py
@@ -7,20 +7,21 @@
 
 BLOCK_SIZE = 512
 
+
 def current_gpu(rchola, rcholb, Ghalfa, Ghalfb):
     nwalkers = Ghalfa.shape[0]
     nalpha = Ghalfa.shape[1]
     nbasis = Ghalfa.shape[2]
     nchol = rchola.shape[0]
-    _Ghalfa = Ghalfa.reshape(nwalkers, nalpha*nbasis)
-    _Ghalfb = Ghalfb.reshape(nwalkers, nalpha*nbasis)
-    _rchola = rcholb.reshape(nchol, nalpha*nbasis)
-    _rcholb = rcholb.reshape(nchol, nalpha*nbasis)
-    Xa = _rchola.dot(_Ghalfa.real.T) + 1.j * _rchola.dot(_Ghalfa.imag.T)
-    Xb = _rcholb.dot(_Ghalfb.real.T) + 1.j * _rcholb.dot(_Ghalfb.imag.T)
+    _Ghalfa = Ghalfa.reshape(nwalkers, nalpha * nbasis)
+    _Ghalfb = Ghalfb.reshape(nwalkers, nalpha * nbasis)
+    _rchola = rcholb.reshape(nchol, nalpha * nbasis)
+    _rcholb = rcholb.reshape(nchol, nalpha * nbasis)
+    Xa = _rchola.dot(_Ghalfa.real.T) + 1.0j * _rchola.dot(_Ghalfa.imag.T)
+    Xb = _rcholb.dot(_Ghalfb.real.T) + 1.0j * _rcholb.dot(_Ghalfb.imag.T)
     ecoul = cp.einsum("xw,xw->w", Xa, Xa, optimize=True)
     ecoul += cp.einsum("xw,xw->w", Xb, Xb, optimize=True)
-    ecoul += 2. * cp.einsum("xw,xw->w", Xa, Xb, optimize=True)
+    ecoul += 2.0 * cp.einsum("xw,xw->w", Xa, Xb, optimize=True)
 
     _Ghalfa = Ghalfa.reshape(nwalkers, nalpha, nbasis)
     _Ghalfb = Ghalfb.reshape(nwalkers, nalpha, nbasis)
@@ -29,70 +30,71 @@ def current_gpu(rchola, rcholb, Ghalfa, Ghalfb):
     _rcholb = rcholb.reshape(nchol, nalpha, nbasis)
 
     Txij = cp.einsum("xim,wjm->wxji", _rchola, _Ghalfa)
-    exx  = cp.einsum("wxji,wxij->w",Txij,Txij)
+    exx = cp.einsum("wxji,wxij->w", Txij, Txij)
     Txij = cp.einsum("xim,wjm->wxji", _rcholb, _Ghalfb)
-    exx += cp.einsum("wxji,wxij->w",Txij,Txij)
+    exx += cp.einsum("wxji,wxij->w", Txij, Txij)
 
     return ecoul, exx
 
+
 # @cuda.jit('void(float64[:,:,:,:], float64[:])')
 # def exchange_kernel(T, exx_w):
-    # nwalker = T.shape[0]
-    # naux = T.shape[1]
-    # nocc = T.shape[2]
-    # nocc_sq = nocc * nocc
-    # thread_ix = cuda.threadIdx.x
-    # block_ix = cuda.blockIdx.x
-    # walker = block_ix // nocc_sq
-    # a = (block_ix % nocc_sq) // nocc
-    # b = (block_ix % nocc_sq) % nocc
-    # shared_array = cuda.shared.array(shape=(BLOCK_SIZE,), dtype=numba.float64)
-    # block_size = cuda.blockDim.x
-    # shared_array[thread_ix] = 0.0
-    # for x in range(thread_ix, naux, block_size):
-        # shared_array[thread_ix] += T[walker, x, a, b] * T[walker, x, b, a]
-    # cuda.syncthreads()
-    # nreduce = block_size // 2
-    # indx = nreduce
-    # for it in range(0, nreduce):
-        # if indx == 0:
-            # break
-        # if thread_ix < indx:
-            # shared_array[thread_ix] += shared_array[thread_ix + indx]
-        # cuda.syncthreads()
-        # indx = indx // 2
-    # if thread_ix == 0:
-        # cuda.atomic.add(exx_w, walker, shared_array[0])
+# nwalker = T.shape[0]
+# naux = T.shape[1]
+# nocc = T.shape[2]
+# nocc_sq = nocc * nocc
+# thread_ix = cuda.threadIdx.x
+# block_ix = cuda.blockIdx.x
+# walker = block_ix // nocc_sq
+# a = (block_ix % nocc_sq) // nocc
+# b = (block_ix % nocc_sq) % nocc
+# shared_array = cuda.shared.array(shape=(BLOCK_SIZE,), dtype=numba.float64)
+# block_size = cuda.blockDim.x
+# shared_array[thread_ix] = 0.0
+# for x in range(thread_ix, naux, block_size):
+# shared_array[thread_ix] += T[walker, x, a, b] * T[walker, x, b, a]
+# cuda.syncthreads()
+# nreduce = block_size // 2
+# indx = nreduce
+# for it in range(0, nreduce):
+# if indx == 0:
+# break
+# if thread_ix < indx:
+# shared_array[thread_ix] += shared_array[thread_ix + indx]
+# cuda.syncthreads()
+# indx = indx // 2
+# if thread_ix == 0:
+# cuda.atomic.add(exx_w, walker, shared_array[0])
 
 # @cuda.jit('void(complex128[:,:,:,:], complex128[:])')
 # def exchange_kernel(T, exx_w):
-    # nwalker = T.shape[0]
-    # naux = T.shape[1]
-    # nocc = T.shape[2]
-    # nocc_sq = nocc * nocc
-    # thread_ix = cuda.threadIdx.x
-    # block_ix = cuda.blockIdx.x
-    # walker = block_ix // nocc_sq
-    # a = (block_ix % nocc_sq) // nocc
-    # b = (block_ix % nocc_sq) % nocc
-    # shared_array = cuda.shared.array(shape=(BLOCK_SIZE,), dtype=numba.complex128)
-    # block_size = cuda.blockDim.x
-    # shared_array[thread_ix] = 0.0
-    # for x in range(thread_ix, naux, block_size):
-        # shared_array[thread_ix] += T[walker, x, a, b] * T[walker, x, b, a]
-    # cuda.syncthreads()
-    # nreduce = block_size // 2
-    # indx = nreduce
-    # for it in range(0, nreduce):
-        # if indx == 0:
-            # break
-        # if thread_ix < indx:
-            # shared_array[thread_ix] += shared_array[thread_ix + indx]
-        # cuda.syncthreads()
-        # indx = indx // 2
-    # if thread_ix == 0:
-        # cuda.atomic.add(exx_w.real, walker, shared_array[0].real)
-        # cuda.atomic.add(exx_w.imag, walker, shared_array[0].imag)
+# nwalker = T.shape[0]
+# naux = T.shape[1]
+# nocc = T.shape[2]
+# nocc_sq = nocc * nocc
+# thread_ix = cuda.threadIdx.x
+# block_ix = cuda.blockIdx.x
+# walker = block_ix // nocc_sq
+# a = (block_ix % nocc_sq) // nocc
+# b = (block_ix % nocc_sq) % nocc
+# shared_array = cuda.shared.array(shape=(BLOCK_SIZE,), dtype=numba.complex128)
+# block_size = cuda.blockDim.x
+# shared_array[thread_ix] = 0.0
+# for x in range(thread_ix, naux, block_size):
+# shared_array[thread_ix] += T[walker, x, a, b] * T[walker, x, b, a]
+# cuda.syncthreads()
+# nreduce = block_size // 2
+# indx = nreduce
+# for it in range(0, nreduce):
+# if indx == 0:
+# break
+# if thread_ix < indx:
+# shared_array[thread_ix] += shared_array[thread_ix + indx]
+# cuda.syncthreads()
+# indx = indx // 2
+# if thread_ix == 0:
+# cuda.atomic.add(exx_w.real, walker, shared_array[0].real)
+# cuda.atomic.add(exx_w.imag, walker, shared_array[0].imag)
 
 
 def new_gpu(rchola, rcholb, Ghalfa, Ghalfb):
@@ -100,15 +102,15 @@ def new_gpu(rchola, rcholb, Ghalfa, Ghalfb):
     nalpha = Ghalfa.shape[1]
     nbasis = Ghalfa.shape[2]
     nchol = rchola.shape[0]
-    _Ghalfa = Ghalfa.reshape(nwalkers, nalpha*nbasis)
-    _Ghalfb = Ghalfb.reshape(nwalkers, nalpha*nbasis)
-    _rchola = rcholb.reshape(nchol, nalpha*nbasis)
-    _rcholb = rcholb.reshape(nchol, nalpha*nbasis)
-    Xa = _rchola.dot(_Ghalfa.real.T) + 1.j * _rchola.dot(_Ghalfa.imag.T)
-    Xb = _rcholb.dot(_Ghalfb.real.T) + 1.j * _rcholb.dot(_Ghalfb.imag.T)
+    _Ghalfa = Ghalfa.reshape(nwalkers, nalpha * nbasis)
+    _Ghalfb = Ghalfb.reshape(nwalkers, nalpha * nbasis)
+    _rchola = rcholb.reshape(nchol, nalpha * nbasis)
+    _rcholb = rcholb.reshape(nchol, nalpha * nbasis)
+    Xa = _rchola.dot(_Ghalfa.real.T) + 1.0j * _rchola.dot(_Ghalfa.imag.T)
+    Xb = _rcholb.dot(_Ghalfb.real.T) + 1.0j * _rcholb.dot(_Ghalfb.imag.T)
     ecoul = cp.einsum("xw,xw->w", Xa, Xa, optimize=True)
     ecoul += cp.einsum("xw,xw->w", Xb, Xb, optimize=True)
-    ecoul += 2. * cp.einsum("xw,xw->w", Xa, Xb, optimize=True)
+    ecoul += 2.0 * cp.einsum("xw,xw->w", Xa, Xb, optimize=True)
 
     _Ghalfa = Ghalfa.reshape(nwalkers, nalpha, nbasis)
     _Ghalfb = Ghalfb.reshape(nwalkers, nalpha, nbasis)
@@ -117,26 +119,27 @@ def new_gpu(rchola, rcholb, Ghalfa, Ghalfb):
     _rcholb = rcholb.reshape(nchol, nalpha, nbasis)
 
     Txij = cp.einsum("xim,wjm->wxji", _rchola, _Ghalfa)
-    exx  = cp.einsum("wxji,wxij->w", Txij, Txij)
+    exx = cp.einsum("wxji,wxij->w", Txij, Txij)
     Txij = cp.einsum("xim,wjm->wxji", _rcholb, _Ghalfb)
     exx += cp.einsum("wxji,wxij->w", Txij, Txij)
 
     return ecoul, exxcache
 
+
 def current_cpu(rchola, rcholb, Ghalfa, Ghalfb):
     nwalkers = Ghalfa.shape[0]
     nalpha = Ghalfa.shape[1]
     nbasis = Ghalfa.shape[2]
     nchol = rchola.shape[0]
-    _Ghalfa = Ghalfa.reshape(nwalkers, nalpha*nbasis)
-    _Ghalfb = Ghalfb.reshape(nwalkers, nalpha*nbasis)
-    _rchola = rcholb.reshape(nchol, nalpha*nbasis)
-    _rcholb = rcholb.reshape(nchol, nalpha*nbasis)
-    Xa = _rchola.dot(_Ghalfa.real.T) + 1.j * _rchola.dot(_Ghalfa.imag.T)
-    Xb = _rcholb.dot(_Ghalfb.real.T) + 1.j * _rcholb.dot(_Ghalfb.imag.T)
+    _Ghalfa = Ghalfa.reshape(nwalkers, nalpha * nbasis)
+    _Ghalfb = Ghalfb.reshape(nwalkers, nalpha * nbasis)
+    _rchola = rcholb.reshape(nchol, nalpha * nbasis)
+    _rcholb = rcholb.reshape(nchol, nalpha * nbasis)
+    Xa = _rchola.dot(_Ghalfa.real.T) + 1.0j * _rchola.dot(_Ghalfa.imag.T)
+    Xb = _rcholb.dot(_Ghalfb.real.T) + 1.0j * _rcholb.dot(_Ghalfb.imag.T)
     ecoul = np.einsum("xw,xw->w", Xa, Xa, optimize=True)
     ecoul += np.einsum("xw,xw->w", Xb, Xb, optimize=True)
-    ecoul += 2. * np.einsum("xw,xw->w", Xa, Xb, optimize=True)
+    ecoul += 2.0 * np.einsum("xw,xw->w", Xa, Xb, optimize=True)
 
     _Ghalfa = Ghalfa.reshape(nwalkers, nalpha, nbasis)
     _Ghalfb = Ghalfb.reshape(nwalkers, nalpha, nbasis)
@@ -145,19 +148,22 @@ def current_cpu(rchola, rcholb, Ghalfa, Ghalfb):
     _rcholb = rcholb.reshape(nchol, nalpha, nbasis)
 
     Txij = np.einsum("xim,wjm->wxji", _rchola, _Ghalfa)
-    exx  = np.einsum("wxji,wxij->w",Txij,Txij)
+    exx = np.einsum("wxji,wxij->w", Txij, Txij)
     Txij = np.einsum("xim,wjm->wxji", _rcholb, _Ghalfb)
-    exx += np.einsum("wxji,wxij->w",Txij,Txij)
+    exx += np.einsum("wxji,wxij->w", Txij, Txij)
 
     return ecoul, exx
 
+
 def exchange_kernel_cpu(T, out):
     out[:] = np.einsum("wxij,wxji->w", T, T, optimize=True)
 
+
 def exchange_kernel_gpu(T, out):
     out[:] = cp.einsum("wxij,wxji->w", T, T, optimize=True)
     cp.cuda.stream.get_current_stream().synchronize()
 
+
 def exchange_kernel_gpu_numba(T, out):
     nwalkers = T.shape[0]
     nocc = T.shape[2]
@@ -165,6 +171,7 @@ def exchange_kernel_gpu_numba(T, out):
     exchange_kernel[blocks_per_grid, BLOCK_SIZE](T, out)
     cp.cuda.stream.get_current_stream().synchronize()
 
+
 nchol = 200
 nbasis = 450
 nocc = 100
@@ -173,16 +180,14 @@ def exchange_kernel_gpu_numba(T, out):
 np.random.seed(7)
 rchola = np.random.random((nchol, nocc, nbasis))
 rcholb = np.random.random((nchol, nocc, nbasis))
-print(rchola.nbytes/1024**3)
-print(rcholb.nbytes/1024**3)
-ghalfa = (
-        np.random.random((nwalkers, nocc, nbasis)) +
-        1j * np.random.random((nwalkers, nocc, nbasis))
-        )
-ghalfb = (
-        np.random.random((nwalkers, nocc, nbasis)) +
-        1j * np.random.random((nwalkers, nocc, nbasis))
-        )
+print(rchola.nbytes / 1024**3)
+print(rcholb.nbytes / 1024**3)
+ghalfa = np.random.random((nwalkers, nocc, nbasis)) + 1j * np.random.random(
+    (nwalkers, nocc, nbasis)
+)
+ghalfb = np.random.random((nwalkers, nocc, nbasis)) + 1j * np.random.random(
+    (nwalkers, nocc, nbasis)
+)
 start = time.time()
 rchola_cp = cp.asarray(rchola)
 rcholb_cp = cp.asarray(rcholb)
@@ -193,10 +198,10 @@ def exchange_kernel_gpu_numba(T, out):
 for n in range(7, 8):
     nocc = 40
     nchol = nocc * 5 * n
-    _X = np.random.normal(size=(nwalkers*nchol*nocc*nocc))
+    _X = np.random.normal(size=(nwalkers * nchol * nocc * nocc))
     _X = _X.reshape(nwalkers, nchol, nocc, nocc)
-    T = _X + 1j*_X
-    T_cp =  cp.asarray(T)
+    T = _X + 1j * _X
+    T_cp = cp.asarray(T)
     out = np.zeros((nwalkers), dtype=np.complex128)
     exchange_kernel_cpu(T, out)
     out_cp_cupy = cp.zeros((nwalkers), dtype=np.complex128)
@@ -205,22 +210,37 @@ def exchange_kernel_gpu_numba(T, out):
     exchange_reduction(T_cp, out_cp)
     start = time.time()
     exchange_kernel_cpu(T, out)
-    cpu_time = time.time()-start
+    cpu_time = time.time() - start
     out_cp_cupy = cp.zeros((nwalkers), dtype=np.complex128)
     start = time.time()
     free_bytes, total_bytes = cp.cuda.Device().mem_info
     used_bytes = total_bytes - free_bytes
-    print("# {:4.3f} GB out of {:4.3f} GB memory on GPU".format(used_bytes/1024**3,total_bytes/1024**3))
+    print(
+        "# {:4.3f} GB out of {:4.3f} GB memory on GPU".format(
+            used_bytes / 1024**3, total_bytes / 1024**3
+        )
+    )
     exchange_kernel_gpu(T_cp, out_cp_cupy)
     print("here")
-    cupy_time = time.time()-start
+    cupy_time = time.time() - start
     out_cp = cp.zeros((nwalkers), dtype=np.complex128)
     free_bytes, total_bytes = cp.cuda.Device().mem_info
     used_bytes = total_bytes - free_bytes
-    print("# {:4.3f} GB out of {:4.3f} GB memory on GPU".format(used_bytes/1024**3,total_bytes/1024**3))
+    print(
+        "# {:4.3f} GB out of {:4.3f} GB memory on GPU".format(
+            used_bytes / 1024**3, total_bytes / 1024**3
+        )
+    )
     start = time.time()
     exchange_reduction(T_cp, out_cp)
     print("here")
-    numba_time = time.time()-start
-    print(n, nchol, nocc, cpu_time, cupy_time, numba_time,
-          np.max(np.abs((out_cp.get()-out_cp_cupy.get()))))
+    numba_time = time.time() - start
+    print(
+        n,
+        nchol,
+        nocc,
+        cpu_time,
+        cupy_time,
+        numba_time,
+        np.max(np.abs((out_cp.get() - out_cp_cupy.get()))),
+    )
diff --git a/timing_scripts/mpi_GPU.py b/timing_scripts/mpi_GPU.py
index 1d026b94..dc85b3b9 100644
--- a/timing_scripts/mpi_GPU.py
+++ b/timing_scripts/mpi_GPU.py
@@ -6,12 +6,12 @@
 import numpy as np
 from mpi4py import MPI
 
-os.environ["I_MPI_PMI_LIBRARY"] = '/cm/shared/apps/slurm/20.02.6/lib64/libpmi2.so'
+os.environ["I_MPI_PMI_LIBRARY"] = "/cm/shared/apps/slurm/20.02.6/lib64/libpmi2.so"
 
 divide = 5
 nao = 1000 // divide
-nocc = 200  // divide
-naux = 4000  // divide
+nocc = 200 // divide
+naux = 4000 // divide
 nwalkers = int(sys.argv[1])
 
 comm = MPI.COMM_WORLD
@@ -29,10 +29,10 @@
 
 
 """ MPI GPU """
-rchol = np.random.rand(naux,nocc*nao)
+rchol = np.random.rand(naux, nocc * nao)
 walkers_batch_Ghalf = np.random.rand(2, nwalkers, nao * nocc)
 with cp.cuda.Device(rank % 2):
-    warmup = cp.dot(cp.array(np.random.rand(2,2)),cp.array(np.random.rand(2,2)))
+    warmup = cp.dot(cp.array(np.random.rand(2, 2)), cp.array(np.random.rand(2, 2)))
     rchol_gpu = cp.asarray(rchol)
     walkers_batch_Ghalf_gpu = cp.asarray(walkers_batch_Ghalf)
     recvbuf = cp.empty_like(walkers_batch_Ghalf_gpu)
@@ -46,8 +46,7 @@
     comm.Send(walkers_batch_Ghalf_gpu, dest=1, tag=13)
 elif rank == 1:
     comm.Recv(recvbuf, source=0, tag=13)
-   
+
 with cp.cuda.Device(rank % 2):
     print("send   : ", walkers_batch_Ghalf_gpu.device, walkers_batch_Ghalf_gpu.sum())
     print("receive: ", recvbuf.device, recvbuf.sum())
-
diff --git a/timing_scripts/numba_benchmark.py b/timing_scripts/numba_benchmark.py
index 5ef02115..9ab015ab 100644
--- a/timing_scripts/numba_benchmark.py
+++ b/timing_scripts/numba_benchmark.py
@@ -3,8 +3,8 @@
 import numpy
 
 # numpy.show_config()
-os.environ['MKL_NUM_THREADS'] = '1'
-os.environ['OMP_NUM_THREADS'] = '1'
+os.environ["MKL_NUM_THREADS"] = "1"
+os.environ["OMP_NUM_THREADS"] = "1"
 
 import time
 
@@ -19,61 +19,62 @@ def local_energy_generic_cholesky_opt_rhf(Ghalfa, rchola):
     nbasis = Ghalfa.shape[1]
 
     GhalfaT = Ghalfa.T.copy()
-    
-    Ta = numpy.zeros((nalpha,nalpha), dtype=numpy.complex128)
 
-    exx  = 0.j  # we will iterate over cholesky index to update Ex energy for alpha and beta
+    Ta = numpy.zeros((nalpha, nalpha), dtype=numpy.complex128)
+
+    exx = 0.0j  # we will iterate over cholesky index to update Ex energy for alpha and beta
     for rmi_a in rchola:
-        rmi_a = rmi_a.reshape((nalpha,nbasis))
-        Ta[:,:].real = rmi_a.dot(GhalfaT.real) 
-        Ta[:,:].imag = rmi_a.dot(GhalfaT.imag)  # this is a (nalpha, nalpha)
+        rmi_a = rmi_a.reshape((nalpha, nbasis))
+        Ta[:, :].real = rmi_a.dot(GhalfaT.real)
+        Ta[:, :].imag = rmi_a.dot(GhalfaT.imag)  # this is a (nalpha, nalpha)
         exx += numpy.einsum("ij,ji->", Ta, Ta) * 2.0
 
     e2b = -0.5 * exx
 
     return e2b
 
-@jit(nopython=True,fastmath=True)
+
+@jit(nopython=True, fastmath=True)
 def local_energy_numba(rchola, Ghalfa_batch):
     naux = rchola.shape[0]
     nwalkers = Ghalfa_batch.shape[0]
     nocc = Ghalfa_batch.shape[1]
 
-    T = numpy.zeros((nocc,nocc), dtype=numpy.complex128)
+    T = numpy.zeros((nocc, nocc), dtype=numpy.complex128)
     exx = numpy.zeros((nwalkers), dtype=numpy.complex128)
     for iw in range(nwalkers):
         Greal = Ghalfa_batch[iw].real.copy()
         Gimag = Ghalfa_batch[iw].imag.copy()
         for jx in range(naux):
-            T = rchola[jx].dot(Greal.T) + 1.j * rchola[jx].dot(Gimag.T)
+            T = rchola[jx].dot(Greal.T) + 1.0j * rchola[jx].dot(Gimag.T)
             exx[iw] += numpy.dot(T.ravel(), T.T.ravel())
     return exx
 
-for nmult in [1,2,3,4,5,6]:
+
+for nmult in [1, 2, 3, 4, 5, 6]:
     nao = 108 * nmult
     nocc = 15 * nmult
     nwalkers = 50
     naux = 693 * nmult
-    
-    rchola = numpy.random.randn(naux * nao * nocc).reshape(naux, nocc,nao)
-    T = numpy.zeros((nocc,nocc), dtype=numpy.complex128)
+
+    rchola = numpy.random.randn(naux * nao * nocc).reshape(naux, nocc, nao)
+    T = numpy.zeros((nocc, nocc), dtype=numpy.complex128)
     exx = numpy.zeros((nwalkers), dtype=numpy.complex128)
-    Ghalfa_batch = numpy.random.randn(nwalkers * nao * nocc).reshape(nwalkers, nocc, nao) + 1.j * numpy.random.randn(nwalkers * nao * nocc).reshape(nwalkers, nocc, nao)
-    
+    Ghalfa_batch = numpy.random.randn(nwalkers * nao * nocc).reshape(
+        nwalkers, nocc, nao
+    ) + 1.0j * numpy.random.randn(nwalkers * nao * nocc).reshape(nwalkers, nocc, nao)
+
     start = time.time()
-    
+
     # for iw in range(nwalkers):
     # 	exx[iw] = local_energy_generic_cholesky_opt_rhf(Ghalfa_batch[iw], rchola)
     # print("{}: new algorithm (RHF) = {}".format(nmult, time.time()-start))
-    
+
     if nmult == 1:
-	    start = time.time()
-	    exx = local_energy_numba(rchola, Ghalfa_batch)
-	    # print("Numba (w. compilation) = {}".format(time.time()-start))
-    
+        start = time.time()
+        exx = local_energy_numba(rchola, Ghalfa_batch)
+        # print("Numba (w. compilation) = {}".format(time.time()-start))
+
     start = time.time()
     exx = local_energy_numba(rchola, Ghalfa_batch)
-    print("{}: Numba = {}".format(nmult,time.time()-start))
-
-
-
+    print("{}: Numba = {}".format(nmult, time.time() - start))
diff --git a/timing_scripts/population_control.py b/timing_scripts/population_control.py
index 30aaa83d..082e1588 100644
--- a/timing_scripts/population_control.py
+++ b/timing_scripts/population_control.py
@@ -14,7 +14,7 @@
 mpi4py.rc.recv_mprobe = False
 from mpi4py import MPI
 
-nelec = (5,5)
+nelec = (5, 5)
 nwalkers = 20
 nsteps = 50
 
@@ -23,24 +23,11 @@
         "nup": nelec[0],
         "ndown": nelec[1],
     },
-    "hamiltonian": {
-        "name": "Generic",
-        "integrals": "afqmc.h5"
-    },
-    "qmc": {
-        "dt": 0.01,
-        "nsteps": nsteps,
-        "nwalkers": nwalkers,
-        "blocks": 1,
-        "batched": True
-    },
-    "trial": {
-        "filename": "afqmc.h5"
-    },
-    "walker":{
-    "population_control":"pair_branch"
-    },
-    "estimators": {}
+    "hamiltonian": {"name": "Generic", "integrals": "afqmc.h5"},
+    "qmc": {"dt": 0.01, "nsteps": nsteps, "nwalkers": nwalkers, "blocks": 1, "batched": True},
+    "trial": {"filename": "afqmc.h5"},
+    "walker": {"population_control": "pair_branch"},
+    "estimators": {},
 }
 
 numpy.random.seed(7)
@@ -49,42 +36,33 @@
 verbose = True
 shared_comm = get_shared_comm(comm, verbose=verbose)
 
-qmc_opts = get_input_value(options, 'qmc',
-                           default={},
-                           verbose=verbose)
-ham_opts = get_input_value(options, 'hamiltonian',
-                           default={},
-                           verbose=verbose)
-twf_opts = get_input_value(options, 'trial',
-                           default={},
-                           verbose=verbose)
-prop_opts = get_input_value(options, 'propoagator',
-                           default={},
-                           verbose=verbose)
-wlk_opts = get_input_value(options, 'walkers', default={},
-                           alias=['walker', 'walker_opts'],
-                           verbose=verbose)
-est_opts = get_input_value(options, 'estimators', default={},
-                           alias=['estimates','estimator'],
-                           verbose=verbose)
+qmc_opts = get_input_value(options, "qmc", default={}, verbose=verbose)
+ham_opts = get_input_value(options, "hamiltonian", default={}, verbose=verbose)
+twf_opts = get_input_value(options, "trial", default={}, verbose=verbose)
+prop_opts = get_input_value(options, "propoagator", default={}, verbose=verbose)
+wlk_opts = get_input_value(
+    options, "walkers", default={}, alias=["walker", "walker_opts"], verbose=verbose
+)
+est_opts = get_input_value(
+    options, "estimators", default={}, alias=["estimates", "estimator"], verbose=verbose
+)
 
 qmc = QMCOpts(qmc_opts, sys, verbose=True)
 qmc.ntot_walkers = qmc.nwalkers * comm.size
 
-ham = get_hamiltonian (sys, ham_opts, verbose = True, comm=shared_comm)
+ham = get_hamiltonian(sys, ham_opts, verbose=True, comm=shared_comm)
 
-trial = ( get_trial_wavefunction(sys, ham, options=twf_opts,
-                       comm=comm,
-                       scomm=shared_comm,
-                       verbose=verbose) )
-trial.calculate_energy(sys, ham) # this is to get the energy shift
+trial = get_trial_wavefunction(
+    sys, ham, options=twf_opts, comm=comm, scomm=shared_comm, verbose=verbose
+)
+trial.calculate_energy(sys, ham)  # this is to get the energy shift
 
 print(trial.psi.shape)
-prop = get_propagator_driver(sys, ham, trial, qmc, options=prop_opts,verbose=verbose)
+prop = get_propagator_driver(sys, ham, trial, qmc, options=prop_opts, verbose=verbose)
 print(trial.psi.shape)
 
 handler_batch = WalkerBatchHandler(sys, ham, trial, qmc, wlk_opts, verbose=False, comm=comm)
-for i in range (nsteps):
+for i in range(nsteps):
     prop.propagate_walker_batch(handler_batch.walkers_batch, sys, ham, trial, trial.energy)
     handler_batch.walkers_batch.reortho()
     handler_batch.pop_control(comm)
@@ -100,50 +78,29 @@
         "nup": nelec[0],
         "ndown": nelec[1],
     },
-    "hamiltonian": {
-        "name": "Generic",
-        "integrals": "afqmc.h5"
-    },
-    "qmc": {
-        "dt": 0.01,
-        "nsteps": nsteps,
-        "nwalkers": nwalkers,
-        "blocks": 1,
-        "batched": False
-    },
-    "trial": {
-        "filename": "afqmc.h5"
-    },
-    "walker":{
-    "population_control":"pair_branch"
-    },
-    "estimators": {}
+    "hamiltonian": {"name": "Generic", "integrals": "afqmc.h5"},
+    "qmc": {"dt": 0.01, "nsteps": nsteps, "nwalkers": nwalkers, "blocks": 1, "batched": False},
+    "trial": {"filename": "afqmc.h5"},
+    "walker": {"population_control": "pair_branch"},
+    "estimators": {},
 }
 
-qmc_opts = get_input_value(options, 'qmc',
-                           default={},
-                           verbose=verbose)
-ham_opts = get_input_value(options, 'hamiltonian',
-                           default={},
-                           verbose=verbose)
-twf_opts = get_input_value(options, 'trial',
-                           default={},
-                           verbose=verbose)
-prop_opts = get_input_value(options, 'propoagator',
-                           default={},
-                           verbose=verbose)
-qmc = QMCOpts(qmc_opts, sys,verbose=True)
+qmc_opts = get_input_value(options, "qmc", default={}, verbose=verbose)
+ham_opts = get_input_value(options, "hamiltonian", default={}, verbose=verbose)
+twf_opts = get_input_value(options, "trial", default={}, verbose=verbose)
+prop_opts = get_input_value(options, "propoagator", default={}, verbose=verbose)
+qmc = QMCOpts(qmc_opts, sys, verbose=True)
 qmc.ntot_walkers = qmc.nwalkers * comm.size
-prop = get_propagator_driver(sys, ham, trial, qmc, options=prop_opts,verbose=verbose)
+prop = get_propagator_driver(sys, ham, trial, qmc, options=prop_opts, verbose=verbose)
 
 handler = Walkers(sys, ham, trial, qmc, wlk_opts, verbose=False, comm=comm)
 
-for i in range (nsteps):
+for i in range(nsteps):
     for walker in handler.walkers:
         prop.propagate_walker(walker, sys, ham, trial, trial.energy)
-        detR = walker.reortho(trial) # reorthogonalizing to stablize
+        detR = walker.reortho(trial)  # reorthogonalizing to stablize
     handler.pop_control(comm)
 
 for iw in range(nwalkers):
     assert numpy.allclose(handler_batch.walkers_batch.phi[iw], handler.walkers[iw].phi)
-    assert numpy.allclose(handler_batch.walkers_batch.weight[iw], handler.walkers[iw].weight)
\ No newline at end of file
+    assert numpy.allclose(handler_batch.walkers_batch.weight[iw], handler.walkers[iw].weight)
diff --git a/timing_scripts/propagation.py b/timing_scripts/propagation.py
index f2f04ef7..82fbaac6 100644
--- a/timing_scripts/propagation.py
+++ b/timing_scripts/propagation.py
@@ -4,43 +4,46 @@
 import numpy as np
 
 # numpy.show_config()
-os.environ['MKL_NUM_THREADS'] = '1'
-os.environ['OMP_NUM_THREADS'] = '1'
+os.environ["MKL_NUM_THREADS"] = "1"
+os.environ["OMP_NUM_THREADS"] = "1"
 
 from numba import jit
 
 
-@jit(nopython=True,fastmath=True)
+@jit(nopython=True, fastmath=True)
 def propagation_numba(VHS, walkers_batch_phi):
     nwalkers = walkers_batch_phi.shape[0]
     for iw in range(nwalkers):
-        for i in range (6):
-           walkers_batch_phi[iw] = VHS[iw].dot(walkers_batch_phi[iw])
-    return 
+        for i in range(6):
+            walkers_batch_phi[iw] = VHS[iw].dot(walkers_batch_phi[iw])
+    return
+
 
 divide = 1
 
 nao = 1000 // divide
-nocc = 200  // divide
-naux = 4000  // divide
+nocc = 200 // divide
+naux = 4000 // divide
 nwalkers = 20
 
-chol = np.random.rand(naux,nao*nao)
+chol = np.random.rand(naux, nao * nao)
 x = np.random.rand(nwalkers, naux)
 
 t0 = time.time()
-VHS = x.dot(chol) + 1.j * x.dot(chol)
+VHS = x.dot(chol) + 1.0j * x.dot(chol)
 VHS = VHS.reshape(nwalkers, nao, nao)
 t1 = time.time()
 
-walkers_batch_phi0 = np.random.rand(nwalkers, nao, nocc) + 1.j * np.random.rand(nwalkers, nao, nocc)
+walkers_batch_phi0 = np.random.rand(nwalkers, nao, nocc) + 1.0j * np.random.rand(
+    nwalkers, nao, nocc
+)
 
 # version 1
 walkers_batch_phi = walkers_batch_phi0.copy()
 t0 = time.time()
 for iw in range(nwalkers):
-    for i in range (6):
-       walkers_batch_phi[iw] = VHS[iw].dot(walkers_batch_phi[iw])
+    for i in range(6):
+        walkers_batch_phi[iw] = VHS[iw].dot(walkers_batch_phi[iw])
 t1 = time.time()
 print("naive propagation = {}".format((t1 - t0)))
 
@@ -55,14 +58,14 @@ def propagation_numba(VHS, walkers_batch_phi):
 # version 2
 walkers_batch_phi2 = walkers_batch_phi0.copy()
 t0 = time.time()
-for i in range (6):
+for i in range(6):
     walkers_batch_phi2 = np.einsum("wmn,wni->wmi", VHS, walkers_batch_phi2, optimize=True)
 t1 = time.time()
-assert np.allclose(walkers_batch_phi[0],walkers_batch_phi2[0])
+assert np.allclose(walkers_batch_phi[0], walkers_batch_phi2[0])
 print("propagation 2 = {}".format((t1 - t0)))
 
 # # version 3
-walkers_batch_phi3 = np.hstack((w for w in walkers_batch_phi0)).reshape(nao, nwalkers*nocc)
+walkers_batch_phi3 = np.hstack((w for w in walkers_batch_phi0)).reshape(nao, nwalkers * nocc)
 # print (walkers_batch_phi3.shape, walkers_batch_phi0.shape)
 # VHS.dot(walkers_batch_phi3)
 # # # version 1
@@ -74,8 +77,3 @@ def propagation_numba(VHS, walkers_batch_phi):
 # #        walkers_batch_phi[iw][1] = VHS[iw].dot(walkers_batch_phi[iw][1])
 # # t1 = time.time()
 # # print("propagation 3 = {}".format((t1 - t0)))
-
-
-
-
-
diff --git a/timing_scripts/propagation_GPU.py b/timing_scripts/propagation_GPU.py
index 02d40d65..79575b56 100644
--- a/timing_scripts/propagation_GPU.py
+++ b/timing_scripts/propagation_GPU.py
@@ -6,9 +6,9 @@
 import numpy as np
 from mpi4py import MPI
 
-#os.environ["I_MPI_PMI_LIBRARY"] = '/cm/shared/apps/slurm/20.02.6/lib64/libpmi2.so'
-os.environ["OMP_NUM_THREADS"] = '1'
-os.environ["MKL_NUM_THREADS"] = '1'
+# os.environ["I_MPI_PMI_LIBRARY"] = '/cm/shared/apps/slurm/20.02.6/lib64/libpmi2.so'
+os.environ["OMP_NUM_THREADS"] = "1"
+os.environ["MKL_NUM_THREADS"] = "1"
 
 divide = 1
 nao = 439 // divide
@@ -23,46 +23,52 @@
 size = comm.Get_size()
 rank = comm.Get_rank()
 
-chol = np.random.rand(naux,nao*nao)
+chol = np.random.rand(naux, nao * nao)
 x = np.random.rand(nwalkers, naux)
 VHS = x.dot(chol)
 VHS = VHS.reshape(nwalkers, nao, nao)
-walkers_batch_phi0 = np.random.rand(nwalkers, nao, nocca+noccb) + 1.j * np.random.rand(nwalkers, nao, nocca+noccb)
+walkers_batch_phi0 = np.random.rand(nwalkers, nao, nocca + noccb) + 1.0j * np.random.rand(
+    nwalkers, nao, nocca + noccb
+)
 
-#""" MPI CPU """
-#walkers_batch_phi = walkers_batch_phi0.copy()
-#t0 = time.time()
-#for t in range(nsteps):
+# """ MPI CPU """
+# walkers_batch_phi = walkers_batch_phi0.copy()
+# t0 = time.time()
+# for t in range(nsteps):
 #    for iw in range(nwalkers):
 #        for i in range (6):
 #               walkers_batch_phi[iw] = VHS[iw].dot(walkers_batch_phi[iw])
-#t1 = time.time()
-#print("MPI Rank {} CPU Time: {}".format(rank, (t1 - t0)))
+# t1 = time.time()
+# print("MPI Rank {} CPU Time: {}".format(rank, (t1 - t0)))
 
 """ MPI GPU using for loop"""
 with cp.cuda.Device(rank):
-    mat = cp.array(np.random.rand(2,2))
-    warmup = cp.dot(cp.array(np.random.rand(2,2)),cp.array(np.random.rand(2,2)))
+    mat = cp.array(np.random.rand(2, 2))
+    warmup = cp.dot(cp.array(np.random.rand(2, 2)), cp.array(np.random.rand(2, 2)))
     walkers_batch_phi = cp.asarray(walkers_batch_phi0.copy(), dtype=cp.complex64)
     t0 = time.time()
-    VHS = cp.asarray(VHS, dtype = cp.float32)
+    VHS = cp.asarray(VHS, dtype=cp.float32)
     for t in range(nsteps):
         for iw in range(nwalkers):
-            for i in range (6):
-                   walkers_batch_phi[iw] = VHS[iw].dot(walkers_batch_phi[iw].real) + 1.j * VHS[iw].dot(walkers_batch_phi[iw].imag)
+            for i in range(6):
+                walkers_batch_phi[iw] = VHS[iw].dot(walkers_batch_phi[iw].real) + 1.0j * VHS[
+                    iw
+                ].dot(walkers_batch_phi[iw].imag)
     t1 = time.time()
 print("MPI Rank {} GPU for loop Time: {} on GPU {}".format(rank, (t1 - t0), VHS.device))
 
 
 """ MPI GPU using einsum"""
 with cp.cuda.Device(rank):
-    mat = cp.array(np.random.rand(2,2))
+    mat = cp.array(np.random.rand(2, 2))
     warmup = cp.einsum("ab,bc->ac", mat, mat, optimize=True)
     walkers_batch_phi = cp.asarray(walkers_batch_phi0.copy(), dtype=cp.complex64)
     t0 = time.time()
     VHS = cp.asarray(VHS, dtype=cp.float32).reshape(nwalkers, nao, nao)
     for t in range(nsteps):
-        for i in range (6):
-            walkers_batch_phi = cp.einsum("wmn,wni->wmi", VHS, walkers_batch_phi.real, optimize=True) + 1.j * cp.einsum("wmn,wni->wmi", VHS, walkers_batch_phi.imag, optimize=True)
+        for i in range(6):
+            walkers_batch_phi = cp.einsum(
+                "wmn,wni->wmi", VHS, walkers_batch_phi.real, optimize=True
+            ) + 1.0j * cp.einsum("wmn,wni->wmi", VHS, walkers_batch_phi.imag, optimize=True)
     t1 = time.time()
 print("MPI Rank {} GPU einsum Time: {} on GPU {}".format(rank, (t1 - t0), VHS.device))
diff --git a/timing_scripts/propagation_VHS.py b/timing_scripts/propagation_VHS.py
index ad1931b0..80fa3a5e 100644
--- a/timing_scripts/propagation_VHS.py
+++ b/timing_scripts/propagation_VHS.py
@@ -8,13 +8,13 @@
 divide = 5
 
 nao = 1000 // divide
-naux = 4000  // divide
+naux = 4000 // divide
 nwalkers = 20
 
 chol = numpy.random.rand(naux, nao, nao)
-chol = chol + chol.transpose(0,2,1)
-chol3 = chol.reshape(naux,nao*nao).copy()
-chol2 = chol.T.reshape(nao,nao,naux).copy() #numpy.random.rand(nao, nao, naux)
+chol = chol + chol.transpose(0, 2, 1)
+chol3 = chol.reshape(naux, nao * nao).copy()
+chol2 = chol.T.reshape(nao, nao, naux).copy()  # numpy.random.rand(nao, nao, naux)
 x = numpy.random.rand(nwalkers, naux)
 
 """1"""
diff --git a/timing_scripts/propagation_VHS_GPU.py b/timing_scripts/propagation_VHS_GPU.py
index a10956eb..32f5e9cb 100644
--- a/timing_scripts/propagation_VHS_GPU.py
+++ b/timing_scripts/propagation_VHS_GPU.py
@@ -6,9 +6,9 @@
 import numpy as np
 from mpi4py import MPI
 
-#os.environ["I_MPI_PMI_LIBRARY"] = '/cm/shared/apps/slurm/20.02.6/lib64/libpmi2.so'
-os.environ["OMP_NUM_THREADS"] = '1'
-os.environ["MKL_NUM_THREADS"] = '1'
+# os.environ["I_MPI_PMI_LIBRARY"] = '/cm/shared/apps/slurm/20.02.6/lib64/libpmi2.so'
+os.environ["OMP_NUM_THREADS"] = "1"
+os.environ["MKL_NUM_THREADS"] = "1"
 
 divide = 1
 nao = 439 // divide
@@ -23,24 +23,24 @@
 rank = comm.Get_rank()
 
 """ MPI CPU """
-chol = np.random.rand(naux, nao*nao)
+chol = np.random.rand(naux, nao * nao)
 x = np.random.rand(nwalkers, naux)
-#t0 = time.time()
-#for i in range(nsteps):
+# t0 = time.time()
+# for i in range(nsteps):
 #    VHS = np.dot(x, chol)
-#t1 = time.time()
-#print("MPI Rank {} CPU Time: {}".format(rank, t1 - t0))
+# t1 = time.time()
+# print("MPI Rank {} CPU Time: {}".format(rank, t1 - t0))
 
 """ MPI GPU """
-chol = np.random.rand(naux, nao*nao)
+chol = np.random.rand(naux, nao * nao)
 x = np.random.rand(nwalkers, naux)
 with cp.cuda.Device(rank):
-    warmup = cp.dot(cp.array(np.random.rand(2,2)),cp.array(np.random.rand(2,2)))
-    x = cp.array(x, dtype = cp.float64)
-    chol = cp.array(chol, dtype = cp.float64)
+    warmup = cp.dot(cp.array(np.random.rand(2, 2)), cp.array(np.random.rand(2, 2)))
+    x = cp.array(x, dtype=cp.float64)
+    chol = cp.array(chol, dtype=cp.float64)
     t0 = time.time()
     for i in range(nsteps):
         VHS = cp.dot(x, chol)
     t1 = time.time()
     print(VHS.dtype)
-print("MPI Rank {} - GPU Time: {} on GPU {}".format(rank, t1 - t0, VHS.device))
\ No newline at end of file
+print("MPI Rank {} - GPU Time: {} on GPU {}".format(rank, t1 - t0, VHS.device))
diff --git a/timing_scripts/propagation_force_bias.py b/timing_scripts/propagation_force_bias.py
index b7ce440d..a64a1b62 100644
--- a/timing_scripts/propagation_force_bias.py
+++ b/timing_scripts/propagation_force_bias.py
@@ -1,8 +1,8 @@
 import os
 
 # numpy.show_config()
-os.environ['MKL_NUM_THREADS'] = '1'
-os.environ['OMP_NUM_THREADS'] = '1'
+os.environ["MKL_NUM_THREADS"] = "1"
+os.environ["OMP_NUM_THREADS"] = "1"
 
 import time
 
@@ -10,7 +10,7 @@
 from numba import jit
 
 
-@jit(nopython=True,fastmath=True)
+@jit(nopython=True, fastmath=True)
 def force_bias_numba(rchol, walkers_batch_Ghalf):
     walkers_batch_Ghalfa_real_T = walkers_batch_Ghalf[0].real.T.copy()
     walkers_batch_Ghalfa_imag_T = walkers_batch_Ghalf[0].imag.T.copy()
@@ -19,30 +19,36 @@ def force_bias_numba(rchol, walkers_batch_Ghalf):
     vfb2_real = rchol.dot(walkers_batch_Ghalfa_real_T) + rchol.dot(walkers_batch_Ghalfb_real_T)
     vfb2_imag = rchol.dot(walkers_batch_Ghalfa_imag_T) + rchol.dot(walkers_batch_Ghalfb_imag_T)
     vfb2 = np.zeros_like(vfb2_real, dtype=np.complex128)
-    vfb2 = vfb2_real + 1.j * vfb2_imag
+    vfb2 = vfb2_real + 1.0j * vfb2_imag
     vfb2 = vfb2.T.copy()
     return vfb2
 
+
 divide = 2
 
 nao = 1000 // divide
-nocc = 200  // divide
-naux = 4000  // divide
+nocc = 200 // divide
+naux = 4000 // divide
 nwalkers = 20
 
-rchol = np.random.rand(naux,nocc*nao)
-walkers_batch_Ghalf = np.random.rand(2, nwalkers, nao, nocc) + 1.j * np.random.rand(2, nwalkers, nao, nocc)
+rchol = np.random.rand(naux, nocc * nao)
+walkers_batch_Ghalf = np.random.rand(2, nwalkers, nao, nocc) + 1.0j * np.random.rand(
+    2, nwalkers, nao, nocc
+)
 
 # algorithm 1
 t0 = time.time()
 vfb = []
 for iw in range(nwalkers):
-    vfb += [np.dot(rchol, walkers_batch_Ghalf[0][iw].ravel()) + np.dot(rchol, walkers_batch_Ghalf[1][iw].ravel())]
+    vfb += [
+        np.dot(rchol, walkers_batch_Ghalf[0][iw].ravel())
+        + np.dot(rchol, walkers_batch_Ghalf[1][iw].ravel())
+    ]
 vfb = np.array(vfb)
 t1 = time.time()
 print("forming vfb naive = {}".format(t1 - t0))
 
-walkers_batch_Ghalf = walkers_batch_Ghalf.reshape(2, nwalkers, nao*nocc)
+walkers_batch_Ghalf = walkers_batch_Ghalf.reshape(2, nwalkers, nao * nocc)
 t0 = time.time()
 vfb2 = rchol.dot(walkers_batch_Ghalf[0].T) + rchol.dot(walkers_batch_Ghalf[1].T)
 vfb2 = vfb2.T.copy()
@@ -51,7 +57,7 @@ def force_bias_numba(rchol, walkers_batch_Ghalf):
 
 assert np.allclose(vfb2, vfb)
 
-walkers_batch_Ghalf = walkers_batch_Ghalf.reshape(2, nwalkers, nao*nocc)
+walkers_batch_Ghalf = walkers_batch_Ghalf.reshape(2, nwalkers, nao * nocc)
 t0 = time.time()
 vfb2_real = rchol.dot(walkers_batch_Ghalf[0].real.T) + rchol.dot(walkers_batch_Ghalf[1].real.T)
 vfb2_imag = rchol.dot(walkers_batch_Ghalf[0].imag.T) + rchol.dot(walkers_batch_Ghalf[1].imag.T)
@@ -65,7 +71,7 @@ def force_bias_numba(rchol, walkers_batch_Ghalf):
 assert np.allclose(vfb2, vfb)
 
 
-walkers_batch_Ghalf = walkers_batch_Ghalf.reshape(2, nwalkers, nao*nocc)
+walkers_batch_Ghalf = walkers_batch_Ghalf.reshape(2, nwalkers, nao * nocc)
 
 t0 = time.time()
 walkers_batch_Ghalfa_real_T = walkers_batch_Ghalf[0].real.T.copy()
diff --git a/timing_scripts/propagation_force_bias_GPU.py b/timing_scripts/propagation_force_bias_GPU.py
index f422e34e..fa3cf778 100644
--- a/timing_scripts/propagation_force_bias_GPU.py
+++ b/timing_scripts/propagation_force_bias_GPU.py
@@ -6,9 +6,9 @@
 import numpy as np
 from mpi4py import MPI
 
-#os.environ["I_MPI_PMI_LIBRARY"] = '/cm/shared/apps/slurm/20.02.6/lib64/libpmi2.so'
-os.environ["OMP_NUM_THREADS"] = '1'
-os.environ["MKL_NUM_THREADS"] = '1'
+# os.environ["I_MPI_PMI_LIBRARY"] = '/cm/shared/apps/slurm/20.02.6/lib64/libpmi2.so'
+os.environ["OMP_NUM_THREADS"] = "1"
+os.environ["MKL_NUM_THREADS"] = "1"
 
 divide = 1
 nao = 439 // divide
@@ -24,29 +24,37 @@
 
 
 """ MPI CPU """
-rchol_a = np.random.rand(naux,nocca*nao)
-rchol_b = np.random.rand(naux,noccb*nao)
-walkers_batch_Ghalfa = np.random.rand(nwalkers, nao * nocca) + 1.j * np.random.rand(nwalkers, nao * nocca)
-walkers_batch_Ghalfb = np.random.rand(nwalkers, nao * noccb) + 1.j * np.random.rand(nwalkers, nao * noccb)
-#t0 = time.time()
-#for i in range(nsteps):
+rchol_a = np.random.rand(naux, nocca * nao)
+rchol_b = np.random.rand(naux, noccb * nao)
+walkers_batch_Ghalfa = np.random.rand(nwalkers, nao * nocca) + 1.0j * np.random.rand(
+    nwalkers, nao * nocca
+)
+walkers_batch_Ghalfb = np.random.rand(nwalkers, nao * noccb) + 1.0j * np.random.rand(
+    nwalkers, nao * noccb
+)
+# t0 = time.time()
+# for i in range(nsteps):
 #    vfb = rchol_a.dot(walkers_batch_Ghalfa.T) + rchol_b.dot(walkers_batch_Ghalfb.T)
-#t1 = time.time()
-#print("MPI Rank {} CPU Time: {}".format(rank, t1 - t0))
+# t1 = time.time()
+# print("MPI Rank {} CPU Time: {}".format(rank, t1 - t0))
 
 """ MPI GPU """
 with cp.cuda.Device(rank):
-    warmup = cp.dot(cp.array(np.random.rand(2,2)),cp.array(np.random.rand(2,2)))
-    rchol_a = cp.asarray(rchol_a, dtype = cp.float64)
-    rchol_b = cp.asarray(rchol_b, dtype = cp.float64)
+    warmup = cp.dot(cp.array(np.random.rand(2, 2)), cp.array(np.random.rand(2, 2)))
+    rchol_a = cp.asarray(rchol_a, dtype=cp.float64)
+    rchol_b = cp.asarray(rchol_b, dtype=cp.float64)
     walkers_batch_Ghalfa = cp.asarray(walkers_batch_Ghalfa)
     walkers_batch_Ghalfb = cp.asarray(walkers_batch_Ghalfb)
     vfb = cp.zeros((nwalkers, naux), dtype=cp.complex64)
-    tmp = cp.zeros((naux, nwalkers), dtype = cp.complex64)
+    tmp = cp.zeros((naux, nwalkers), dtype=cp.complex64)
     t0 = time.time()
     for i in range(nsteps):
-        tmp[:,:].real = rchol_a.dot(walkers_batch_Ghalfa.real.T) + rchol_b.dot(walkers_batch_Ghalfb.real.T)
-        tmp[:,:].imag = rchol_a.dot(walkers_batch_Ghalfa.imag.T) + rchol_b.dot(walkers_batch_Ghalfb.imag.T)
+        tmp[:, :].real = rchol_a.dot(walkers_batch_Ghalfa.real.T) + rchol_b.dot(
+            walkers_batch_Ghalfb.real.T
+        )
+        tmp[:, :].imag = rchol_a.dot(walkers_batch_Ghalfa.imag.T) + rchol_b.dot(
+            walkers_batch_Ghalfb.imag.T
+        )
         vfb = tmp.T.copy()
     t1 = time.time()
-print("MPI Rank {} - CPU/GPU Time: {} on GPU {}".format(rank, t1 - t0, vfb.device))
\ No newline at end of file
+print("MPI Rank {} - CPU/GPU Time: {} on GPU {}".format(rank, t1 - t0, vfb.device))
diff --git a/timing_scripts/propagation_greens_function.py b/timing_scripts/propagation_greens_function.py
index a7f762fa..4adfc5d0 100644
--- a/timing_scripts/propagation_greens_function.py
+++ b/timing_scripts/propagation_greens_function.py
@@ -1,14 +1,18 @@
 try:
     import cupy as np
+
     _gpu = True
 except ImportError:
     import numpy as np
+
     _gpu = False
 import time
 
 from ipie.hamiltonians.generic import Generic as HamGeneric
 from ipie.legacy.estimators.greens_function import (
-    greens_function_single_det, greens_function_single_det_batch)
+    greens_function_single_det,
+    greens_function_single_det_batch,
+)
 from ipie.legacy.walkers.single_det import SingleDetWalker
 from ipie.legacy.walkers.single_det_batch import SingleDetWalkerBatch
 from ipie.systems.generic import Generic
@@ -18,10 +22,11 @@
 divide = 5
 
 nao = 1000 // divide
-nocc = 200  // divide
-naux = 4000  // divide
+nocc = 200 // divide
+naux = 4000 // divide
 nwalkers = 20
 
+
 def time_overlap():
     def loop_based(a, b):
         for iw in range(a.shape[0]):
@@ -32,7 +37,7 @@ def loop_based(a, b):
             s, o = np.linalg.slogdet(inv)
 
     def einsum_based(a, b):
-        ovlps = np.einsum('wmi,mj->wij', a, b.conj(), optimize=True)
+        ovlps = np.einsum("wmi,mj->wij", a, b.conj(), optimize=True)
         invs = np.linalg.inv(ovlps)
         s, o = np.linalg.slogdet(invs)
 
@@ -46,7 +51,8 @@ def einsum_based(a, b):
         start = time.time()
         einsum_based(psi, trial)
         t_einsum = time.time() - start
-        print(nwalkers, t_einsum/t_loop)
+        print(nwalkers, t_einsum / t_loop)
+
 
 def time_dets():
     def loop_based(ovlp):
@@ -67,7 +73,8 @@ def einsum_based(ovlp):
         start = time.time()
         einsum_based(ovlps)
         t_einsum = time.time() - start
-        print(nwalkers, t_einsum/t_loop)
+        print(nwalkers, t_einsum / t_loop)
+
 
 def time_ghalf():
     def loop_based(a, b, out):
@@ -75,14 +82,14 @@ def loop_based(a, b, out):
             out[iw] = np.dot(b[iw], a[iw].T)
 
     def einsum_based(a, b, out):
-        out = np.einsum('wij,wmj->wim', b, a, optimize=True)
+        out = np.einsum("wij,wmj->wim", b, a, optimize=True)
 
     def dot_based(a, b, out):
         nw = a.shape[0]
         no = b.shape[1]
         nb = a.shape[1]
-        a_ = a.reshape((nw*nb, no))
-        b_ = b.reshape((nw*no,no))
+        a_ = a.reshape((nw * nb, no))
+        b_ = b.reshape((nw * no, no))
         out = np.dot(b_, a_.T)
 
     # Ghalf construction
@@ -99,7 +106,8 @@ def dot_based(a, b, out):
         start = time.time()
         dot_based(walkers, ovlps, gf)
         t_dot = time.time() - start
-        print(nwalkers, t_einsum/t_loop, t_dot/t_loop)
+        print(nwalkers, t_einsum / t_loop, t_dot / t_loop)
+
 
 def time_gfull():
     def loop_based(a, b, out):
@@ -107,7 +115,7 @@ def loop_based(a, b, out):
             out[iw] = np.dot(b.conj(), a[iw])
 
     def einsum_based(a, b, out):
-        out = np.einsum('mi,win->wmn', b.conj(), a, optimize=True)
+        out = np.einsum("mi,win->wmn", b.conj(), a, optimize=True)
 
     # Ghalf construction
     for nwalkers in range(1, 40, 5):
@@ -121,24 +129,25 @@ def einsum_based(a, b, out):
         start = time.time()
         einsum_based(ghalf, trial, gf)
         t_einsum = time.time() - start
-        print(nwalkers, t_einsum/t_loop)
+        print(nwalkers, t_einsum / t_loop)
+
 
 # Full GF test
 def time_routines():
     for nwalkers in range(1, 40, 5):
         wfn = get_random_nomsd(nocc, nocc, nao, ndet=1)
         h1e = np.random.random((nao, nao))
-        system = Generic(nelec=(nocc,nocc))
+        system = Generic(nelec=(nocc, nocc))
         nmo = nao
         chol = np.zeros((naux, nmo, nmo))
-        ham = HamGeneric(h1e=np.array([h1e,h1e]),
-                         chol=chol.reshape((naux,nmo*nmo)).T.copy(),
-                         ecore=0)
+        ham = HamGeneric(
+            h1e=np.array([h1e, h1e]), chol=chol.reshape((naux, nmo * nmo)).T.copy(), ecore=0
+        )
         if _gpu:
             ham.cast_to_cupy()
         trial = MultiSlater(system, ham, wfn)
-        trial.psia = trial.psi[0,:,:nocc].copy()
-        trial.psib = trial.psi[0,:,nocc:].copy()
+        trial.psia = trial.psi[0, :, :nocc].copy()
+        trial.psib = trial.psi[0, :, nocc:].copy()
         trial.psi = trial.psi[0]
         walkers = [SingleDetWalker(system, ham, trial) for _ in range(nwalkers)]
         walker_batch = SingleDetWalkerBatch(system, ham, trial, nwalkers)
@@ -147,10 +156,11 @@ def time_routines():
         loop = time.time() - start
         start = time.time()
         greens_function_single_det_batch(walker_batch, trial)
-        print(nwalkers, (time.time() - start)/loop)
+        print(nwalkers, (time.time() - start) / loop)
+
 
-if __name__ == '__main__':
-    tmp = np.dot(np.random.random((100,100)), np.eye(100))
+if __name__ == "__main__":
+    tmp = np.dot(np.random.random((100, 100)), np.eye(100))
     print(">>>> Overlap <<<<<")
     time_overlap()
     print(">>>> Dets <<<<<")
diff --git a/timing_scripts/unpack_GPU.py b/timing_scripts/unpack_GPU.py
index 81adcc3e..0274c527 100644
--- a/timing_scripts/unpack_GPU.py
+++ b/timing_scripts/unpack_GPU.py
@@ -8,55 +8,56 @@
 from ipie.utils.pack import pack_cholesky, pack_cholesky_fast, unpack_VHS_batch
 
 
-@cuda.jit('void(int32[:],int32[:],complex128[:,:],complex128[:,:,:])',device=False)
-def unpack_VHS_batch_gpu(idx_i,idx_j,VHS_packed,VHS):
+@cuda.jit("void(int32[:],int32[:],complex128[:,:],complex128[:,:,:])", device=False)
+def unpack_VHS_batch_gpu(idx_i, idx_j, VHS_packed, VHS):
     nwalkers = VHS.shape[0]
     nbsf = VHS.shape[1]
-    nut = round(nbsf *(nbsf+1)/2)
+    nut = round(nbsf * (nbsf + 1) / 2)
     pos = cuda.grid(1)
     pos1 = pos // nut
     pos2 = pos - pos1 * nut
-    if (pos1 < nwalkers and pos2 < nut):
-        VHS[pos1, idx_i[pos2],idx_j[pos2]] = VHS_packed[pos1,pos2]
-        VHS[pos1, idx_j[pos2],idx_i[pos2]] = VHS_packed[pos1,pos2]
+    if pos1 < nwalkers and pos2 < nut:
+        VHS[pos1, idx_i[pos2], idx_j[pos2]] = VHS_packed[pos1, pos2]
+        VHS[pos1, idx_j[pos2], idx_i[pos2]] = VHS_packed[pos1, pos2]
+
 
 nbsf = 800
 nwalkers = 100
-nchol = 4*nbsf
-nupper = int(nbsf*(nbsf+1)/2)
+nchol = 4 * nbsf
+nupper = int(nbsf * (nbsf + 1) / 2)
 
-xauxf = numpy.random.random((nchol,nwalkers)) + 1.j * numpy.random.random((nchol,nwalkers))
+xauxf = numpy.random.random((nchol, nwalkers)) + 1.0j * numpy.random.random((nchol, nwalkers))
 
 start_time = time.time()
-Lchol = numpy.random.randn(nbsf**2*nchol).reshape(nbsf,nbsf, nchol)
-Lchol_packed = numpy.zeros((nupper,nchol))
+Lchol = numpy.random.randn(nbsf**2 * nchol).reshape(nbsf, nbsf, nchol)
+Lchol_packed = numpy.zeros((nupper, nchol))
 end_time = time.time()
-#print("allocation: {}".format(end_time-start_time))
+# print("allocation: {}".format(end_time-start_time))
 
 start_time = time.time()
-Lchol = Lchol.transpose(0,1,2) + Lchol.transpose(1,0,2)
+Lchol = Lchol.transpose(0, 1, 2) + Lchol.transpose(1, 0, 2)
 end_time = time.time()
-#print("symmetrization: {}".format(end_time-start_time))
+# print("symmetrization: {}".format(end_time-start_time))
 
 idx = numpy.triu_indices(nbsf)
 
 start_time = time.time()
 
-pack_cholesky(idx[0],idx[1],Lchol_packed, Lchol)
+pack_cholesky(idx[0], idx[1], Lchol_packed, Lchol)
 
 end_time = time.time()
-#print("packing: {}".format(end_time-start_time))
+# print("packing: {}".format(end_time-start_time))
 
-Lchol = Lchol.reshape(nbsf**2,nchol)
+Lchol = Lchol.reshape(nbsf**2, nchol)
 start_time = time.time()
-VHS = Lchol.dot(xauxf.real) + 1.j * Lchol.dot(xauxf.imag)
+VHS = Lchol.dot(xauxf.real) + 1.0j * Lchol.dot(xauxf.imag)
 VHS = VHS.T.copy()
-VHS = VHS.reshape(nwalkers,nbsf,nbsf)
+VHS = VHS.reshape(nwalkers, nbsf, nbsf)
 end_time = time.time()
-print("original: {}".format(end_time-start_time))
+print("original: {}".format(end_time - start_time))
 
 start_time = time.time()
-VHS_packed = Lchol_packed.dot(xauxf.real) + 1.j * Lchol_packed.dot(xauxf.imag)
+VHS_packed = Lchol_packed.dot(xauxf.real) + 1.0j * Lchol_packed.dot(xauxf.imag)
 VHS_packed = VHS_packed.T.copy()
 VHS_packed_CPU = VHS_packed.copy()
 end_time = time.time()
@@ -64,14 +65,14 @@ def unpack_VHS_batch_gpu(idx_i,idx_j,VHS_packed,VHS):
 print("packed gemm (CPU): {}".format(tpacked_gemm))
 
 start_time = time.time()
-VHS_unpacked = numpy.zeros((nwalkers,nbsf,nbsf),dtype=numpy.complex128)
-unpack_VHS_batch(idx[0],idx[1],VHS_packed,VHS_unpacked)
+VHS_unpacked = numpy.zeros((nwalkers, nbsf, nbsf), dtype=numpy.complex128)
+unpack_VHS_batch(idx[0], idx[1], VHS_packed, VHS_unpacked)
 end_time = time.time()
-tunpacking = end_time-start_time
+tunpacking = end_time - start_time
 print("unpacking (CPU): {}".format(tunpacking))
 print("packed total: {}".format(tpacked_gemm + tunpacking))
 
-A = cupy.zeros((2,2),dtype=cupy.complex128)
+A = cupy.zeros((2, 2), dtype=cupy.complex128)
 B = A.dot(A)
 B = A.T.dot(A)
 B = A.T.copy().dot(A)
@@ -81,38 +82,38 @@ def unpack_VHS_batch_gpu(idx_i,idx_j,VHS_packed,VHS):
 Lchol_packed = cupy.array(Lchol_packed)
 xauxf = cupy.array(xauxf)
 
-VHS_packed = Lchol_packed.dot(xauxf.real) + 1.j * Lchol_packed.dot(xauxf.imag)
+VHS_packed = Lchol_packed.dot(xauxf.real) + 1.0j * Lchol_packed.dot(xauxf.imag)
 VHS_packed = VHS_packed.T.copy()
 cupy.cuda.Stream.null.synchronize()
 start_time = time.time()
-VHS_packed = Lchol_packed.dot(xauxf.real) + 1.j * Lchol_packed.dot(xauxf.imag)
+VHS_packed = Lchol_packed.dot(xauxf.real) + 1.0j * Lchol_packed.dot(xauxf.imag)
 VHS_packed = VHS_packed.T.copy()
 end_time = time.time()
 tpacked_gemm = end_time - start_time
 print("packed gemm (GPU, DP): {}".format(tpacked_gemm))
 
-#Lchol_packed = Lchol_packed.astype(numpy.float32)
-#xauxf = xauxf.astype(numpy.complex64)
-#VHS_packed = Lchol_packed.dot(xauxf.real) + 1.j * Lchol_packed.dot(xauxf.imag)
-#VHS_packed = VHS_packed.T.copy()
-#start_time = time.time()
-#VHS_packed = Lchol_packed.dot(xauxf.real) + 1.j * Lchol_packed.dot(xauxf.imag)
-#VHS_packed = VHS_packed.T.copy()
-#end_time = time.time()
-#tpacked_gemm = end_time - start_time
-#print("packed gemm (GPU, SP): {}".format(tpacked_gemm))
+# Lchol_packed = Lchol_packed.astype(numpy.float32)
+# xauxf = xauxf.astype(numpy.complex64)
+# VHS_packed = Lchol_packed.dot(xauxf.real) + 1.j * Lchol_packed.dot(xauxf.imag)
+# VHS_packed = VHS_packed.T.copy()
+# start_time = time.time()
+# VHS_packed = Lchol_packed.dot(xauxf.real) + 1.j * Lchol_packed.dot(xauxf.imag)
+# VHS_packed = VHS_packed.T.copy()
+# end_time = time.time()
+# tpacked_gemm = end_time - start_time
+# print("packed gemm (GPU, SP): {}".format(tpacked_gemm))
 
-VHS_unpacked = cupy.zeros((nwalkers,nbsf,nbsf),dtype=numpy.complex128)
+VHS_unpacked = cupy.zeros((nwalkers, nbsf, nbsf), dtype=numpy.complex128)
 
 threadsperblock = 512
-nut = round(nbsf *(nbsf+1)/2)
-blockspergrid = math.ceil(VHS.shape[0]*nut / threadsperblock)
-unpack_VHS_batch_gpu[blockspergrid, threadsperblock](idx_i,idx_j,VHS_packed,VHS_unpacked)
+nut = round(nbsf * (nbsf + 1) / 2)
+blockspergrid = math.ceil(VHS.shape[0] * nut / threadsperblock)
+unpack_VHS_batch_gpu[blockspergrid, threadsperblock](idx_i, idx_j, VHS_packed, VHS_unpacked)
 cupy.cuda.Stream.null.synchronize()
 start_time = time.time()
-unpack_VHS_batch_gpu[blockspergrid, threadsperblock](idx_i,idx_j,VHS_packed,VHS_unpacked)
+unpack_VHS_batch_gpu[blockspergrid, threadsperblock](idx_i, idx_j, VHS_packed, VHS_unpacked)
 end_time = time.time()
-print("unpacking (GPU): {}".format(end_time-start_time))
+print("unpacking (GPU): {}".format(end_time - start_time))
 
 diff = VHS - cupy.asnumpy(VHS_unpacked)
 print(numpy.max(numpy.abs(diff)))
diff --git a/tools/add_license.py b/tools/add_license.py
index 32afdaa5..40f159d8 100644
--- a/tools/add_license.py
+++ b/tools/add_license.py
@@ -1,5 +1,4 @@
-
-NOTICE="""
+NOTICE = """
 # Copyright 2022 The ipie Developers. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -20,6 +19,7 @@
 from pathlib import Path
 import subprocess
 
+
 def get_authors(file_path):
     authors = subprocess.check_output(f"git shortlog {file_path} -n -s --email".split())
     first_names = [name.decode("utf_8") for name in authors.split()[1::4]]
@@ -27,18 +27,19 @@ def get_authors(file_path):
     email = [name.decode("utf_8") for name in authors.split()[3::4]]
     return list(zip(first_names, last_names, email))
 
+
 files = [("bin/ipie", get_authors("bin/ipie"))]
-for path in Path('bin').rglob('*.py'):
+for path in Path("bin").rglob("*.py"):
     files.append((path, get_authors(path)))
-for path in Path('examples').rglob('*.py'):
+for path in Path("examples").rglob("*.py"):
     files.append((path, get_authors(path)))
-for path in Path('ipie').rglob('*.py'):
+for path in Path("ipie").rglob("*.py"):
     files.append((path, get_authors(path)))
-for path in Path('lib').rglob('*.c'):
+for path in Path("lib").rglob("*.c"):
     files.append((path, get_authors(path)))
-for path in Path('lib').rglob('*.h'):
+for path in Path("lib").rglob("*.h"):
     files.append((path, get_authors(path)))
-for path in Path('lib').rglob('*.py'):
+for path in Path("lib").rglob("*.py"):
     files.append((path, get_authors(path)))
 
 for file_path, names in files:
@@ -52,18 +53,20 @@ def get_authors(file_path):
         if len(names) > 0:
             num_authors = 0
             string = ""
-            for (f, l, e) in names:
+            for f, l, e in names:
                 if f in string:
                     continue
                 if ".com" not in e:
                     _email = ""
-                    string += '{:s} {:s}\n# {:9s}'.format(f, l, "")
+                    string += "{:s} {:s}\n# {:9s}".format(f, l, "")
                 else:
                     _email = e
-                    string += '{:s} {:s} {:s}\n# {:9s}'.format(f, l, _email, "")
+                    string += "{:s} {:s} {:s}\n# {:9s}".format(f, l, _email, "")
                 num_authors += 1
             authors = """#
 # Author{:s}: {}
-""".format("s" if num_authors > 1 else "", string.strip())
+""".format(
+                "s" if num_authors > 1 else "", string.strip()
+            )
         file.seek(0, 0)
-        file.write(NOTICE + authors + '\n' + file_data)
+        file.write(NOTICE + authors + "\n" + file_data)
diff --git a/tools/convert_phase.py b/tools/convert_phase.py
index bbaaafcf..f1658c83 100644
--- a/tools/convert_phase.py
+++ b/tools/convert_phase.py
@@ -1,7 +1,6 @@
 import h5py
 import numpy
-from pie.utils.io import (write_qmcpack_dense, write_qmcpack_sparse,
-                          write_qmcpack_wfn)
+from pie.utils.io import write_qmcpack_dense, write_qmcpack_sparse, write_qmcpack_wfn
 from pie.utils.linalg import modified_cholesky
 
 f = h5py.File("2FeIII_nat.h5", "r")
diff --git a/tools/fcidump_to_afqmc.py b/tools/fcidump_to_afqmc.py
index 67a3912d..9aad9a72 100755
--- a/tools/fcidump_to_afqmc.py
+++ b/tools/fcidump_to_afqmc.py
@@ -1,4 +1,3 @@
-
 # Copyright 2022 The ipie Developers. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tools/pyscf/pyscf_to_ipie.py b/tools/pyscf/pyscf_to_ipie.py
index 23ae838a..3c26ab56 100644
--- a/tools/pyscf/pyscf_to_ipie.py
+++ b/tools/pyscf/pyscf_to_ipie.py
@@ -5,14 +5,10 @@
 import h5py
 import numpy
 
-from ipie.utils.from_pyscf import (
-        gen_ipie_input_from_pyscf_chk,
-        load_from_pyscf_chkfile
-        )
+from ipie.utils.from_pyscf import gen_ipie_input_from_pyscf_chk, load_from_pyscf_chkfile
 from ipie.utils.io import write_json_input_file
 
 
-
 def parse_args(args):
     """Parse command-line arguments.
 
@@ -89,12 +85,18 @@ def parse_args(args):
         help="Number of core orbitals to freeze.",
     )
     parser.add_argument(
-        "-o", "--ortho-ao", dest="oao", action="store_true", help="Whether to do"
-        " use orthogonalized AO basis."
+        "-o",
+        "--ortho-ao",
+        dest="oao",
+        action="store_true",
+        help="Whether to do" " use orthogonalized AO basis.",
     )
     parser.add_argument(
-        "--lin-dep", dest="lin_dep", type=float, default=0, help="Linear "
-        "dependency threshold for canonical orthogonalization."
+        "--lin-dep",
+        dest="lin_dep",
+        type=float,
+        default=0,
+        help="Linear " "dependency threshold for canonical orthogonalization.",
     )
     parser.add_argument(
         "-v", "--verbose", dest="verbose", action="store_true", help="Verbose output."
@@ -131,15 +133,17 @@ def main(args):
         linear_dep_thresh=options.lin_dep,
     )
     scf_data = load_from_pyscf_chkfile(options.input_scf)
-    nelec_mol = scf_data['mol'].nelec
+    nelec_mol = scf_data["mol"].nelec
     nfzn = options.num_frozen_core
-    nelec_sim = (nelec_mol[0]-nfzn, nelec_mol[1]-nfzn)
+    nelec_sim = (nelec_mol[0] - nfzn, nelec_mol[1] - nfzn)
     write_json_input_file(
-        options.json_input, options.output, options.wfn, nelec_sim,
+        options.json_input,
+        options.output,
+        options.wfn,
+        nelec_sim,
         estimates_filename=options.est,
     )
 
 
 if __name__ == "__main__":
-
     main(sys.argv[1:])
diff --git a/tools/reblock.py b/tools/reblock.py
index c3759503..9d48cc7b 100755
--- a/tools/reblock.py
+++ b/tools/reblock.py
@@ -15,11 +15,7 @@
 sys.path.append(os.path.join(_script_dir, "analysis"))
 import glob
 
-from ipie.analysis.blocking import (
-        analyse_estimates,
-        reblock_minimal,
-        average_fp
-        )
+from ipie.analysis.blocking import analyse_estimates, reblock_minimal, average_fp
 from ipie.analysis.extraction import extract_data_sets
 
 
@@ -52,8 +48,7 @@ def parse_args(args):
         type=int,
         dest="block_start",
         default=0,
-        help="Simulation block to start blocking analysis from (equilibration "
-        "time): Default 0",
+        help="Simulation block to start blocking analysis from (equilibration " "time): Default 0",
     )
     parser.add_argument(
         "-m",