diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 9c452ab1..af9f2c9a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -64,7 +64,7 @@ jobs: MPLBACKEND: agg - name: Codecov - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 with: token: ${{ secrets.CODECOV_TOKEN }} name: codecov-${{ matrix.os }}-py${{ matrix.python-version }} diff --git a/CHANGES b/CHANGES index bc754f75..6734d315 100644 --- a/CHANGES +++ b/CHANGES @@ -14,6 +14,19 @@ The rules for this file: ------------------------------------------------------------------------------ +**/**/2024 xiki-tempula + + * 2.3.0 + +Changes + - Default value for keyword argument `initial_nk` of the MBAR estimator was + changed to "BAR" (run an initial BAR calculation before MBAR) instead of + `None` (start from all zeros) as this change provides a sizable speedup (PR #357) + +Enhancements + - `BAR` result is used as initial guess for `MBAR` estimator. (PR #357) + - `forward_backward_convergence` uses the result from the previous step as the initial guess for the next step. (PR #357) + 06/04/2024 hl2500, xiki-tempula * 2.2.0 diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index 4c2d9349..65f15b86 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -82,7 +82,7 @@ def forward_backward_convergence(df_list, estimator="MBAR", num=10, **kwargs): raise ValueError(msg) else: # select estimator class by name - estimator_fit = estimators_dispatch[estimator](**kwargs).fit + my_estimator = estimators_dispatch[estimator](**kwargs) logger.info(f"Use {estimator} estimator for convergence analysis.") logger.info("Begin forward analysis") @@ -94,7 +94,9 @@ def forward_backward_convergence(df_list, estimator="MBAR", num=10, **kwargs): for data in df_list: sample.append(data[: len(data) // num * i]) sample = concat(sample) - result = estimator_fit(sample) + result = my_estimator.fit(sample) + if estimator == "MBAR": + my_estimator.initial_f_k = result.delta_f_.iloc[0, :] forward_list.append(result.delta_f_.iloc[0, -1]) if estimator.lower() == "bar": error = np.sqrt( @@ -121,7 +123,9 @@ def forward_backward_convergence(df_list, estimator="MBAR", num=10, **kwargs): for data in df_list: sample.append(data[-len(data) // num * i :]) sample = concat(sample) - result = estimator_fit(sample) + result = my_estimator.fit(sample) + if estimator == "MBAR": + my_estimator.initial_f_k = result.delta_f_.iloc[0, :] backward_list.append(result.delta_f_.iloc[0, -1]) if estimator.lower() == "bar": error = np.sqrt( diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py index 33d0a3fd..e0ab594a 100644 --- a/src/alchemlyb/estimators/mbar_.py +++ b/src/alchemlyb/estimators/mbar_.py @@ -1,7 +1,12 @@ +from __future__ import annotations +from typing import Literal + +import numpy as np import pandas as pd import pymbar from sklearn.base import BaseEstimator +from . import BAR from .base import _EstimatorMixOut @@ -17,9 +22,18 @@ class MBAR(BaseEstimator, _EstimatorMixOut): relative_tolerance : float, optional Set to determine the relative tolerance convergence criteria. - initial_f_k : np.ndarray, float, shape=(K), optional - Set to the initial dimensionless free energies to use as a - guess (default None, which sets all :math:`f_k = 0`). + initial_f_k : np.ndarray, float, shape=(K), optional or String `BAR` + When `isinstance(initial_f_k, np.ndarray)`, `initial_f_k` will be used as + initial guess for MBAR estimator. initial_f_k should be dimensionless + free energies. + When `initial_f_k` is ``None``, ``initial_f_k`` will be set to 0. + When `initial_f_k` is set to "BAR", a BAR calculation will be done and + the result is used as the initial guess (default). + + .. versionchanged:: 2.3.0 + The new default is now "BAR" as it provides a substantial speedup + over the previous default `None`. + method : str, optional, default="robust" The optimization routine to use. This can be any of the methods @@ -71,14 +85,19 @@ def __init__( self, maximum_iterations=10000, relative_tolerance=1.0e-7, - initial_f_k=None, + initial_f_k: np.ndarray | Literal["BAR"] | None = "BAR", method="robust", n_bootstraps=0, verbose=False, ): self.maximum_iterations = maximum_iterations self.relative_tolerance = relative_tolerance - self.initial_f_k = initial_f_k + if isinstance(initial_f_k, str) and initial_f_k != "BAR": + raise ValueError( + f"Only `BAR` is supported as string input to `initial_f_k`. Got ({initial_f_k})." + ) + else: + self.initial_f_k = initial_f_k self.method = method self.verbose = verbose self.n_bootstraps = n_bootstraps @@ -108,13 +127,24 @@ def fit(self, u_nk): ] self._states_ = u_nk.columns.values.tolist() + if isinstance(self.initial_f_k, str) and self.initial_f_k == "BAR": + bar = BAR( + maximum_iterations=self.maximum_iterations, + relative_tolerance=self.relative_tolerance, + verbose=self.verbose, + ) + bar.fit(u_nk) + initial_f_k = bar.delta_f_.iloc[0, :] + else: + initial_f_k = self.initial_f_k + self._mbar = pymbar.MBAR( u_nk.T, N_k, maximum_iterations=self.maximum_iterations, relative_tolerance=self.relative_tolerance, verbose=self.verbose, - initial_f_k=self.initial_f_k, + initial_f_k=initial_f_k, solver_protocol=self.method, n_bootstraps=self.n_bootstraps, ) diff --git a/src/alchemlyb/tests/test_fep_estimators.py b/src/alchemlyb/tests/test_fep_estimators.py index a956dbbc..46487c18 100644 --- a/src/alchemlyb/tests/test_fep_estimators.py +++ b/src/alchemlyb/tests/test_fep_estimators.py @@ -2,6 +2,7 @@ """ +import numpy as np import pytest import alchemlyb @@ -151,3 +152,18 @@ def test_bootstrap(gmx_benzene_Coulomb_u_nk): assert mbar_bootstrap_mean == mbar_mean assert mbar_bootstrap_err != mbar_err + + +def test_wrong_initial_f_k(): + with pytest.raises( + ValueError, match="Only `BAR` is supported as string input to `initial_f_k`" + ): + MBAR(initial_f_k="aaa") + + +@pytest.mark.parametrize("initial_f_k", ["BAR", None]) +def test_initial_f_k(gmx_benzene_Coulomb_u_nk, initial_f_k): + u_nk = alchemlyb.concat(gmx_benzene_Coulomb_u_nk) + mbar = MBAR(initial_f_k=initial_f_k) + mbar.fit(u_nk) + assert np.isclose(mbar.delta_f_.loc[0.00, 1.00], 3.0411556983908046)