diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
index 8a9feac84..1ef61cfd7 100644
--- a/.github/workflows/CI.yaml
+++ b/.github/workflows/CI.yaml
@@ -5,12 +5,14 @@ on:
   pull_request:
     branches:
       - "main"
+      - "0.10.*"
   schedule:
     # nightly tests
     - cron: "0 0 * * *"
   push:
     branches:
       - main
+      - "0.10.x"
 
 concurrency:
   group: "${{ github.workflow }}-${{ github.ref }}"
diff --git a/.github/workflows/self-hosted-gpu-test.yml b/.github/workflows/self-hosted-gpu-test.yml
index 55986fa38..4c3b63413 100644
--- a/.github/workflows/self-hosted-gpu-test.yml
+++ b/.github/workflows/self-hosted-gpu-test.yml
@@ -3,6 +3,7 @@ on:
   push:
     branches:
       - main
+      - "0.10.x"
   workflow_dispatch:
   schedule:
     # nightly tests
diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml
index 25fe5685d..a482b0ccd 100644
--- a/devtools/conda-envs/test_env.yaml
+++ b/devtools/conda-envs/test_env.yaml
@@ -30,11 +30,11 @@ dependencies:
   - openmmforcefields >=0.9.0
   - openmmtools # may need to sort out ambermini/ambertools/parmed dependencies
   - openmoltools # may need to sort out ambermini/ambertools/parmed dependencies (we don't want ambertools)
-  - parmed # may need to sort out ambermini/ambertools/parmed dependencies
+  - parmed
   - pdbfixer
   - pip
   - progressbar2
-  - pymbar <4
+  - pymbar >=4
   - pytest
   - pytest-attrib
   - pytest-cov
diff --git a/perses/analysis/analysis.py b/perses/analysis/analysis.py
index 4a05a62d6..0c0b7d769 100644
--- a/perses/analysis/analysis.py
+++ b/perses/analysis/analysis.py
@@ -15,7 +15,7 @@
 
 import numpy as np
 import itertools
-import pymbar
+from openmmtools.multistate.pymbar import _pymbar_bar
 from perses import storage
 import seaborn as sns
 
@@ -203,7 +203,7 @@ def get_free_energies(self, environment):
                 resampled_w_F = np.random.choice(w_F, len(w_F), replace=True)
                 resampled_w_R = np.random.choice(w_R, len(w_R), replace=True)
 
-                [df, ddf] = pymbar.BAR(resampled_w_F, resampled_w_R)
+                [df, ddf] = _pymbar_bar(resampled_w_F, resampled_w_R)
                 bootstrapped_bar[i] = df
 
             free_energies[state_pair] = [np.mean(bootstrapped_bar), np.std(bootstrapped_bar)]
diff --git a/perses/analysis/fah_analysis.py b/perses/analysis/fah_analysis.py
index 0025e9688..83ac0bc62 100644
--- a/perses/analysis/fah_analysis.py
+++ b/perses/analysis/fah_analysis.py
@@ -2,7 +2,7 @@
 import numpy as np
 import pandas as pd
 import seaborn as sns
-from pymbar import BAR
+from openmmtools.multistate.pymbar import _pymbar_bar
 import matplotlib.pyplot as plt
 import seaborn
 from simtk.openmm import unit
@@ -160,7 +160,7 @@ def _bootstrap_BAR(run, phase, gen_id, n_bootstrap):
                 f"less than {min_num_work_values} reverse work values (got {len(r_works)})"
             )
 
-        fe, err = bootstrap_uncorrelated(BAR, n_iters=n_bootstrap)(
+        fe, err = bootstrap_uncorrelated(_pymbar_bar, n_iters=n_bootstrap)(
             f_works.values, r_works.values
         )
 
@@ -227,7 +227,7 @@ def _process_phase(i, phase):
             axes[i].set_title(phase)
 
             # TODO add bootstrapping here
-            d[f"{phase}_fes"] = BAR(np.asarray(all_forward), np.asarray(all_reverse))
+            d[f"{phase}_fes"] = _pymbar_bar(np.asarray(all_forward), np.asarray(all_reverse))
 
         for i, phase in enumerate(projects.keys()):
             try:
diff --git a/perses/annihilation/relative.py b/perses/annihilation/relative.py
index 3f69f07aa..f5c8b205c 100644
--- a/perses/annihilation/relative.py
+++ b/perses/annihilation/relative.py
@@ -3309,28 +3309,26 @@ def _is_solvent(particle_index, positive_ion_name="NA", negative_ion_name="CL",
             else:
                 return False
 
-        assert type(particles) in [type(set()), int], f"`particles` must be an integer or a set, got {type(particles)}."
-
         if isinstance(particles, int):
-            rest_id = [0, 1, 0] # Set the default scale_id to nonrest solute
+            rest_id = [0, 1, 0] # Set the default rest_id to nonrest solute
             if not self._rest_region:
-                return rest_id  # If there are no rest regions, set everything as nonrest_solute bc these atoms are not scaled
+                return rest_id
             else:
-                if particles in self._rest_region: # Here, particles is a single int
+                if particles in self._rest_region: # The particle is in the REST region
                     rest_id = [1, 0, 0]
-                elif _is_solvent(particles): # If the particle is not in a rest region, check if it is a solvent atom
+                elif _is_solvent(particles): # The particle is a non-rest solvent atom
                     rest_id = [0, 0, 1]
                 return rest_id
 
         elif isinstance(particles, set):
-            rest_id = [0, 0, 1] # Set the default scale_id to nonrest solute
+            rest_id = [0, 0, 1] # Set the default rest_id to nonrest solute
             if not self._rest_region:
-                return rest_id # If there are no scale regions, set everything as nonrest bc these atoms are not scaled
+                return rest_id
             else:
-                if particles.intersection(self._rest_region) != set(): # At least one of the particles is in the idx_th rest region
-                    if particles.issubset(self._rest_region): # Then this term is wholly in the rest region
+                if particles.intersection(self._rest_region) != set(): # At least one of the particles is in the rest region
+                    if particles.issubset(self._rest_region): # All particles are in the rest region
                         rest_id = [1, 0, 0]
-                    else: # It is inter region
+                    else: # At least one (but not all) particles are in the rest region
                         rest_id = [0, 1, 0]
                 return rest_id
 
@@ -3440,8 +3438,8 @@ def _create_bond_force(self):
         custom_bond_force.addPerBondParameter('is_unique_new')
 
         # Add per-bond parameters for defining energy
-        custom_bond_force.addPerBondParameter('length_old')  # old bond length
-        custom_bond_force.addPerBondParameter('length_new')  # new bond length
+        custom_bond_force.addPerBondParameter('length_old')  # old equilibrium bond length
+        custom_bond_force.addPerBondParameter('length_new')  # new equilibrium bond length
         custom_bond_force.addPerBondParameter('K_old')  # old spring constant
         custom_bond_force.addPerBondParameter('K_new')  # new spring constant
 
@@ -3494,7 +3492,7 @@ def _copy_bonds(self):
             rest_id = self.get_rest_identifier(idx_set)
             alch_id, atom_class = self.get_alch_identifier(idx_set)
 
-            # Get the old terms and if they exist, the old terms
+            # Get the old terms and if they exist, the new terms
             old_bond_idx, r0_old, k_old = old_term_collector[hybrid_index_pair]
             try:
                 new_bond_idx, r0_new, k_new = new_term_collector[hybrid_index_pair]
@@ -3586,8 +3584,8 @@ def _create_angle_force(self):
         custom_angle_force.addPerAngleParameter('is_unique_new')
 
         # Add per-angle parameters for defining energy
-        custom_angle_force.addPerAngleParameter('theta0_old')  # old angle length
-        custom_angle_force.addPerAngleParameter('theta0_new')  # new angle length
+        custom_angle_force.addPerAngleParameter('theta0_old')  # old equilibrium angle
+        custom_angle_force.addPerAngleParameter('theta0_new')  # new equilibrium angle
         custom_angle_force.addPerAngleParameter('K_old')  # old spring constant
         custom_angle_force.addPerAngleParameter('K_new')  # new spring constant
 
diff --git a/perses/app/cli.py b/perses/app/cli.py
index a94cebf9c..be003cc66 100644
--- a/perses/app/cli.py
+++ b/perses/app/cli.py
@@ -88,12 +88,14 @@ def _test_platform(platform_name):
 def cli(yaml, platform_name, override):
     """Run perses relative free energy calculation."""
     from perses.app.setup_relative_calculation import run
+    from perses import __version__ as perses_version
     logging.basicConfig(
         format="%(asctime)s %(levelname)-8s %(message)s",
         level=LOGLEVEL,
         datefmt="%Y-%m-%d %H:%M:%S",
     )
     click.echo(click.style(percy, fg="bright_magenta"))
+    click.echo(f"Running perses version: {perses_version}")
     if override:
         click.echo("✍️ \t Overrides used")
     click.echo("🕵️\t Checking OpenEye license")
diff --git a/perses/app/relative_hydration.py b/perses/app/relative_hydration.py
index b0b29cc89..a92d20608 100644
--- a/perses/app/relative_hydration.py
+++ b/perses/app/relative_hydration.py
@@ -108,11 +108,11 @@ def check_alchemical_hybrid_elimination_bar(topology_proposal, old_positions, ne
         bar.update(i)
     del reverse_context, reverse_integrator
 
-    from pymbar import BAR
-    [df, ddf] = BAR(w_f, w_r)
+    from openmmtools.multistate.pymbar import _pymbar_bar
+    [df, ddf] = _pymbar_bar(w_f, w_r)
     print("df = %12.6f +- %12.5f kT" % (df, ddf))
 
 
 if __name__=="__main__":
     topology_proposal, old_positions, new_positions = generate_solvated_hybrid_test_topology()
-    check_alchemical_hybrid_elimination_bar(topology_proposal, old_positions, new_positions, ncmc_nsteps=100, n_iterations=500)
\ No newline at end of file
+    check_alchemical_hybrid_elimination_bar(topology_proposal, old_positions, new_positions, ncmc_nsteps=100, n_iterations=500)
diff --git a/perses/app/relative_setup.py b/perses/app/relative_setup.py
index 5d5e4b80f..021965ed5 100644
--- a/perses/app/relative_setup.py
+++ b/perses/app/relative_setup.py
@@ -9,7 +9,7 @@
 
 from openmmtools.states import ThermodynamicState, CompoundThermodynamicState, SamplerState
 
-import pymbar
+from openmmtools.multistate.pymbar import _pymbar_bar, _pymbar_exp, detect_equilibration
 from cloudpathlib import AnyPath
 import simtk.openmm as openmm
 import simtk.openmm.app as app
@@ -1717,11 +1717,11 @@ def compute_sMC_free_energy(self):
         """
         for _direction, works in self._nonequilibrium_cumulative_work.items():
             if works is not None:
-                self.dg_EXP[_direction] = pymbar.EXP(works[:,-1])
+                self.dg_EXP[_direction] = _pymbar_exp(works[:,-1])
 
         if all(work is not None for work in self._nonequilibrium_cumulative_work.values()):
             #then we can compute a BAR estimate
-            self.dg_BAR = pymbar.BAR(self._nonequilibrium_cumulative_work['forward'][:,-1], self._nonequilibrium_cumulative_work['reverse'][:,-1])
+            self.dg_BAR = _pymbar_bar(self._nonequilibrium_cumulative_work['forward'][:,-1], self._nonequilibrium_cumulative_work['reverse'][:,-1])
 
 
     def attempt_resample(self, observable = 'ESS', resampling_method = 'multinomial', resample_observable_threshold = 0.5):
@@ -2152,7 +2152,7 @@ def _adjust_for_correlation(self, timeseries_array: np.array):
         Neff_max : float
             Max number of uncorrelated samples
         """
-        [t0, g, Neff_max] = pymbar.timeseries.detectEquilibration(timeseries_array)
+        [t0, g, Neff_max] = detect_equilibration(timeseries_array)
 
         return timeseries_array[t0:], g, Neff_max
 
@@ -2203,11 +2203,11 @@ def _endpoint_perturbations(self, direction = None, num_subsamples = 100):
             self._nonalchemical_reduced_potential_differences[start_lambda] = np.array([nonalchemical_reduced_potential_differences[i] for i in nonalch_full_uncorrelated_indices])
 
             #now to bootstrap results
-            alchemical_exp_results = np.array([pymbar.EXP(np.random.choice(self._alchemical_reduced_potential_differences[_direction], size = (len(self._alchemical_reduced_potential_differences[_direction]))), compute_uncertainty=False) for _ in range(num_subsamples)])
+            alchemical_exp_results = np.array([_pymbar_exp(np.random.choice(self._alchemical_reduced_potential_differences[_direction], size = (len(self._alchemical_reduced_potential_differences[_direction]))), compute_uncertainty=False) for _ in range(num_subsamples)])
             self._EXP[_direction] = (np.average(alchemical_exp_results), np.std(alchemical_exp_results)/np.sqrt(num_subsamples))
             _logger.debug(f"alchemical exp result for {_direction}: {self._EXP[_direction]}")
 
-            nonalchemical_exp_results = np.array([pymbar.EXP(np.random.choice(self._nonalchemical_reduced_potential_differences[start_lambda], size = (len(self._nonalchemical_reduced_potential_differences[start_lambda]))), compute_uncertainty=False) for _ in range(num_subsamples)])
+            nonalchemical_exp_results = np.array([_pymbar_exp(np.random.choice(self._nonalchemical_reduced_potential_differences[start_lambda], size = (len(self._nonalchemical_reduced_potential_differences[start_lambda]))), compute_uncertainty=False) for _ in range(num_subsamples)])
             self._EXP[start_lambda] = (np.average(nonalchemical_exp_results), np.std(nonalchemical_exp_results)/np.sqrt(num_subsamples))
             _logger.debug(f"nonalchemical exp result for {start_lambda}: {self._EXP[start_lambda]}")
 
@@ -2223,7 +2223,7 @@ def _alchemical_free_energy(self, num_subsamples = 100):
         work_subsamples = {'forward': [np.random.choice(self._nonequilibrium_cum_work['forward'], size = (len(self._nonequilibrium_cum_work['forward']))) for _ in range(num_subsamples)],
                            'reverse': [np.random.choice(self._nonequilibrium_cum_work['reverse'], size = (len(self._nonequilibrium_cum_work['reverse']))) for _ in range(num_subsamples)]}
 
-        bar_estimates = np.array([pymbar.BAR(forward_sample, reverse_sample, compute_uncertainty=False) for forward_sample, reverse_sample in zip(work_subsamples['forward'], work_subsamples['reverse'])])
+        bar_estimates = np.array([_pymbar_bar(forward_sample, reverse_sample, compute_uncertainty=False) for forward_sample, reverse_sample in zip(work_subsamples['forward'], work_subsamples['reverse'])])
         df, ddf = np.average(bar_estimates), np.std(bar_estimates) / np.sqrt(num_subsamples)
         self._BAR = [df, ddf]
         return (df, ddf)
diff --git a/perses/dispersed/smc.py b/perses/dispersed/smc.py
index e2850350a..5958498e2 100644
--- a/perses/dispersed/smc.py
+++ b/perses/dispersed/smc.py
@@ -10,7 +10,7 @@
 from perses.annihilation.lambda_protocol import LambdaProtocol
 from perses.annihilation.lambda_protocol import RelativeAlchemicalState
 import random
-import pymbar
+from openmmtools.multistate.pymbar import _pymbar_bar, _pymbar_exp
 from perses.dispersed.parallel import Parallelism
 # Instantiate logger
 _logger = logging.getLogger("sMC")
@@ -726,10 +726,10 @@ def compute_sMC_free_energy(self, cumulative_work_dict):
         self.dg_EXP = {}
         for _direction, _lst in cumulative_work_dict.items():
             self.cumulative_work[_direction] = _lst
-            self.dg_EXP[_direction] = np.array([pymbar.EXP(_lst[:,i]) for i in range(_lst.shape[1])])
+            self.dg_EXP[_direction] = np.array([_pymbar_exp(_lst[:,i]) for i in range(_lst.shape[1])])
             _logger.debug(f"cumulative_work for {_direction}: {self.cumulative_work[_direction]}")
         if len(list(self.cumulative_work.keys())) == 2:
-            self.dg_BAR = pymbar.BAR(self.cumulative_work['forward'][:, -1], self.cumulative_work['reverse'][:, -1])
+            self.dg_BAR = _pymbar_bar(self.cumulative_work['forward'][:, -1], self.cumulative_work['reverse'][:, -1])
 
     def minimize_sampler_states(self):
         """
diff --git a/perses/dispersed/utils.py b/perses/dispersed/utils.py
index 36b1e833c..3f99c9c47 100644
--- a/perses/dispersed/utils.py
+++ b/perses/dispersed/utils.py
@@ -306,10 +306,10 @@ def compute_timeseries(reduced_potentials):
         uncorrelated indices
 
     """
-    from pymbar import timeseries
-    t0, g, Neff_max = timeseries.detectEquilibration(reduced_potentials) #computing indices of uncorrelated timeseries
+    from openmmtools.multistate.pymbar import detect_equilibration, subsample_correlated_data
+    t0, g, Neff_max = detect_equilibration(reduced_potentials) #computing indices of uncorrelated timeseries
     A_t_equil = reduced_potentials[t0:]
-    uncorrelated_indices = timeseries.subsampleCorrelatedData(A_t_equil, g=g)
+    uncorrelated_indices = subsample_correlated_data(A_t_equil, g=g)
     A_t = A_t_equil[uncorrelated_indices]
     full_uncorrelated_indices = [i+t0 for i in uncorrelated_indices]
 
diff --git a/perses/tests/test_examples.py b/perses/tests/test_examples.py
deleted file mode 100644
index 95e46344a..000000000
--- a/perses/tests/test_examples.py
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/usr/bin/env python
-
-# ======================================================================
-# MODULE DOCSTRING
-# ======================================================================
-
-"""
-Test that the examples in the repo run without errors.
-"""
-
-# ======================================================================
-# GLOBAL IMPORTS
-# ======================================================================
-
-import pathlib
-import pytest
-import subprocess
-from perses.tests.utils import enter_temp_directory
-
-ROOT_DIR_PATH = pathlib.Path(__file__).joinpath("../../../").resolve()
-SLOW_EXAMPLES_KEYWORDS = ("barnase-barstar", "kinase-neq-switching")
-NOT_EXAMPLES_KEYWORDS = ("analyze-benchmark", "moonshot-mainseries")
-
-
-def run_script_file(file_path, cmd_args=None):
-    """Run through the shell a python script."""
-    with enter_temp_directory():
-        cmd = ["python", file_path]
-        print(cmd)
-        # Extend cmd list with given cmd_args
-        if cmd_args:
-            cmd.extend(cmd_args)
-        try:
-            subprocess.run(cmd, capture_output=True, check=True)
-        except subprocess.CalledProcessError as error:
-            raise Exception(f"Example {file_path} failed. STDERR: {error.stderr.decode()}")
-
-
-def find_example_scripts():
-    """Find all Python scripts, excluding Jupyter notebooks, in the examples folder.
-    Returns
-    -------
-    example_file_paths : List[str]
-        List of full paths to python scripts to execute.
-    """
-    examples_dir_path = ROOT_DIR_PATH.joinpath("examples")
-
-    example_file_paths = []
-    for example_file_path in examples_dir_path.glob("*/*.py"):
-        # TODO: find a better way to mark slow examples
-        example_posix_path = example_file_path.as_posix()
-        # mark slow examples
-        if any(example in example_posix_path for example in SLOW_EXAMPLES_KEYWORDS):
-            example_posix_path = pytest.param(example_posix_path, marks=pytest.mark.slow)
-        # mark non-examples
-        elif any(example in example_posix_path for example in NOT_EXAMPLES_KEYWORDS):
-            example_posix_path = pytest.param(example_posix_path, marks=pytest.mark.skip("not an example"))
-        example_file_paths.append(example_posix_path)
-
-    return example_file_paths
-
-
-# ======================================================================
-# TESTS
-# ======================================================================
-@pytest.mark.parametrize("example_file_path", find_example_scripts())
-def test_examples(example_file_path):
-    """Test that the example run without errors."""
-    run_script_file(example_file_path)
diff --git a/perses/tests/test_relative.py b/perses/tests/test_relative.py
index 37223df8a..bed3cd7eb 100644
--- a/perses/tests/test_relative.py
+++ b/perses/tests/test_relative.py
@@ -19,9 +19,8 @@
 from openmmtools.integrators import LangevinIntegrator
 from unittest import skipIf
 
-import pymbar.timeseries as timeseries
+from openmmtools.multistate.pymbar import _pymbar_exp, detect_equilibration
 import pytest
-import pymbar
 
 running_on_github_actions = os.environ.get('GITHUB_ACTIONS', None) == 'true'
 
@@ -100,9 +99,9 @@ def calculate_cross_variance(all_results):
         non_b = all_results[2]
         hybrid_b = all_results[3]
     print('CROSS VALIDATION')
-    [df, ddf] = pymbar.EXP(non_a - hybrid_b)
+    [df, ddf] = _pymbar_exp(non_a - hybrid_b)
     print('df: {}, ddf: {}'.format(df, ddf))
-    [df, ddf] = pymbar.EXP(non_b - hybrid_a)
+    [df, ddf] = _pymbar_exp(non_b - hybrid_a)
     print('df: {}, ddf: {}'.format(df, ddf))
     return
 
@@ -395,9 +394,9 @@ def run_endpoint_perturbation(lambda_thermodynamic_state, nonalchemical_thermody
         md.Trajectory(nonalchemical_trajectory / unit.nanometers, nonalchemical_mdtraj_topology).save(f'nonalchemical{lambda_index}.pdb')
 
     # Analyze data and return results
-    [t0, g, Neff_max] = timeseries.detectEquilibration(w)
+    [t0, g, Neff_max] = detect_equilibration(w)
     w_burned_in = w[t0:]
-    [df, ddf] = pymbar.EXP(w_burned_in)
+    [df, ddf] = _pymbar_exp(w_burned_in)
     ddf_corrected = ddf * np.sqrt(g)
     results = [df, ddf_corrected, t0, Neff_max]
 
diff --git a/perses/tests/utils.py b/perses/tests/utils.py
index 51da033a5..152f1cb50 100644
--- a/perses/tests/utils.py
+++ b/perses/tests/utils.py
@@ -504,7 +504,7 @@ def validate_rjmc_work_variance(top_prop, positions, geometry_method = 0, num_it
     md_steps: int
         number of md_steps to run in each num_iteration
     compute_timeseries = bool (default False)
-        whether to use pymbar detectEquilibration and subsampleCorrelated data from the MD run (the potential energy is the data)
+        whether to use pymbar detect_equilibration and subsample_correlated data from the MD run (the potential energy is the data)
     prespecified_conformers = None or unit.Quantity(np.array([num_iterations, system.getNumParticles(), 3]), unit = unit.nanometers)
         whether to input a unit.Quantity of conformers and bypass the conformer_generation/pymbar stage; None will default conduct this phase
 
@@ -562,9 +562,9 @@ def validate_rjmc_work_variance(top_prop, positions, geometry_method = 0, num_it
 
         if compute_timeseries:
             print(f"computing production and data correlation")
-            from pymbar import timeseries
-            t0, g, Neff = timeseries.detectEquilibration(rps)
-            series = timeseries.subsampleCorrelatedData(np.arange(t0, num_iterations), g = g)
+            from openmmtools.multistate.pymbar import detect_equilibration
+            t0, g, Neff = detect_equilibration(rps)
+            series = subsample_correlated_data(np.arange(t0, num_iterations), g = g)
             print(f"production starts at index {t0} of {num_iterations}")
             print(f"the number of effective samples is {Neff}")
             indices = t0 + series