From eed8ab8abc07fa1869ceab35d5c0aa9edf4b5e56 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 28 Feb 2021 20:57:37 +0000 Subject: [PATCH 001/123] load_data --- src/alchemlyb/workflows/__init__.py | 0 src/alchemlyb/workflows/abfe.py | 25 +++++++++++++++++ src/alchemlyb/workflows/base.py | 43 +++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+) create mode 100644 src/alchemlyb/workflows/__init__.py create mode 100644 src/alchemlyb/workflows/abfe.py create mode 100644 src/alchemlyb/workflows/base.py diff --git a/src/alchemlyb/workflows/__init__.py b/src/alchemlyb/workflows/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py new file mode 100644 index 00000000..d841af07 --- /dev/null +++ b/src/alchemlyb/workflows/abfe.py @@ -0,0 +1,25 @@ +import pandas as pd +import logging + +from base import WorkflowBase +from ..parsing import gmx + +class ABFE(WorkflowBase): + def load_data(self, software='Gromacs', dir='./', prefix='dhdl', + suffix='xvg', T=298): + xvg_list = super().load_data(dir=dir, prefix=prefix, suffix=suffix) + if software == 'Gromacs': + self.logger.info('Using {} parser to read the data.'.format( + software)) + try: + u_nk_list = pd.concat( + [gmx.extract_u_nk(xvg, T=T) for xvg in xvg_list]) + except: + self.logger.warning('Could not read u_nk data.') + try: + dHdl_list = pd.concat( + [gmx.extract_dHdl(xvg, T=T) for xvg in xvg_list]) + except: + self.logger.warning('Could not read dHdl data.') + + diff --git a/src/alchemlyb/workflows/base.py b/src/alchemlyb/workflows/base.py new file mode 100644 index 00000000..99d72832 --- /dev/null +++ b/src/alchemlyb/workflows/base.py @@ -0,0 +1,43 @@ +import os +import logging + +class WorkflowBase(): + '''Base class for workflow creation. + ''' + def __init__(self, **kwargs): + self.logger = logging.getLogger('Initialise Alchemlyb Workflow') + self.load_data(**kwargs) + self.sub_sampling(**kwargs) + self.sub_sampling(**kwargs) + self.compute(**kwargs) + self.plot(**kwargs) + self.write(**kwargs) + + def load_data(self, software='Gromacs', dir='./', prefix='dhdl', + suffix='xvg', T=298): + self.logger.info('Finding files with prefix: {}, suffix: {} under ' + 'directory {} produced by {}'.format(prefix, suffix, + dir, software)) + xvg_list = [] + file_list = os.listdir(dir) + for file in file_list: + if file[:len(prefix)] == prefix and file[-len(prefix):] == suffix: + xvg_list.append(os.path.join(dir, file)) + + self.logger.info('Found {} files.'.format(len(xvg_list))) + self.logger.debug('File list: \n {}'.format('\n'.join(xvg_list))) + return xvg_list + + def sub_sampling(self): + pass + + def compute(self): + pass + + def plot(self): + pass + + def write(self): + pass + + From 4f96b5850c84301bc70570aac53e3618f0f8748a Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 7 Mar 2021 17:36:26 +0000 Subject: [PATCH 002/123] update --- src/alchemlyb/parsing/gmx.py | 2 + src/alchemlyb/workflows/abfe.py | 200 +++++++++++++++++++++++++++++--- src/alchemlyb/workflows/base.py | 16 +-- 3 files changed, 191 insertions(+), 27 deletions(-) diff --git a/src/alchemlyb/parsing/gmx.py b/src/alchemlyb/parsing/gmx.py index abc35168..cc5a23ee 100644 --- a/src/alchemlyb/parsing/gmx.py +++ b/src/alchemlyb/parsing/gmx.py @@ -104,6 +104,7 @@ def extract_u_nk(xvg, T): u_k = u_k.reset_index().set_index(newind) u_k.name = 'u_nk' + u_k.attrs['state'] = state return u_k @@ -181,6 +182,7 @@ def extract_dHdl(xvg, T): dHdl= dHdl.reset_index().set_index(newind) dHdl.name='dH/dl' + dHdl.attrs['state'] = state return dHdl diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index d841af07..5736eb57 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -1,25 +1,199 @@ +import os import pandas as pd import logging -from base import WorkflowBase -from ..parsing import gmx +from ..parsing import gmx, amber, namd, gomc +from ..preprocessing.subsampling import statistical_inefficiency +from ..estimators import MBAR, BAR, TI -class ABFE(WorkflowBase): - def load_data(self, software='Gromacs', dir='./', prefix='dhdl', - suffix='xvg', T=298): - xvg_list = super().load_data(dir=dir, prefix=prefix, suffix=suffix) - if software == 'Gromacs': +class ABFE(): + def __init__(self, software='Gromacs', dir='./', prefix='dhdl', + suffix='xvg', T=298, skiptime=None, uncorr=None, + threshold=50, estimator=None, out='./', forwrev=0): + self.logger = logging.getLogger('alchemlyb.workflows.ABFE') + self.logger.info('Initialise Alchemlyb ABFE Workflow') + self.logger.info('Finding files with prefix: {}, suffix: {} under ' + 'directory {} produced by {}'.format(prefix, suffix, + dir, software)) + self.file_list = [] + file_list = os.listdir(dir) + for file in file_list: + if file[:len(prefix)] == prefix and file[-len(suffix):] == suffix: + self.file_list.append(os.path.join(dir, file)) + + self.logger.info('Found {} xvg files.'.format(len(self.file_list))) + self.logger.debug('File list: \n {}'.format('\n'.join(self.file_list))) + + if software.lower() == 'gromacs': + self.logger.info('Using {} parser to read the data.'.format( + software)) + extract_u_nk = gmx.extract_u_nk + extract_dHdl = gmx.extract_dHdl + elif software.lower() == 'amber': + self.logger.info('Using {} parser to read the data.'.format( + software)) + extract_u_nk = amber.extract_u_nk + extract_dHdl = amber.extract_dHdl + elif software.lower() == 'namd': + self.logger.info('Using {} parser to read the data.'.format( + software)) + extract_u_nk = namd.extract_u_nk + self.logger.warning('No dHdl reader available for NAMD.') + elif software.lower() == 'gomc': self.logger.info('Using {} parser to read the data.'.format( software)) + extract_u_nk = gomc.extract_u_nk + extract_dHdl = gomc.extract_dHdl + else: + raise NameError('{} parser not found.'.format(software)) + + self.u_nk_list = [] + self.dHdl_list = [] + for xvg in self.file_list: try: - u_nk_list = pd.concat( - [gmx.extract_u_nk(xvg, T=T) for xvg in xvg_list]) + u_nk = extract_u_nk(xvg, T=T) + self.logger.debug( + 'Reading {} lines of u_nk from {}'.format(len(u_nk), xvg)) + self.u_nk_list.append(u_nk) except: - self.logger.warning('Could not read u_nk data.') + self.logger.warning( + 'Error reading read u_nk from {}.'.format(xvg)) + try: - dHdl_list = pd.concat( - [gmx.extract_dHdl(xvg, T=T) for xvg in xvg_list]) + dhdl = extract_dHdl(xvg, T=T) + self.logger.debug( + 'Reading {} lines of dhdl from {}'.format(len(dhdl), xvg)) + self.dHdl_list.append(dhdl) except: - self.logger.warning('Could not read dHdl data.') + self.logger.warning( + 'Error reading read dhdl from {}.'.format(xvg)) + + # Sort the files according to the state + self.u_nk_list.sort(key=lambda x: x.attrs['state']) + self.dHdl_list.sort(key=lambda x: x.attrs['state']) + + if skiptime is not None and uncorr is not None: + self.preprocess(skiptime=skiptime, uncorr=uncorr, + threshold=threshold) + if estimator is not None: + self.estimate(estimator, out=out) + + + + def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): + self.logger.info('Start preprocessing with skiptime of {} ' + 'uncorrelation method of {} and ' + 'threshold of {}'.format(skiptime, uncorr, threshold)) + if len(self.u_nk_list) > 0: + self.logger.info( + 'Processing the u_nk data set with skiptime of {}.'.format( + skiptime)) + + self.u_nk_sample_list = [] + for index, u_nk in enumerate(self.u_nk_list): + # Get rid of the skiptime + u_nk = u_nk[u_nk.index.get_level_values('time')>skiptime] + if uncorr == 'dhdl': + # Find the current column index + # Select the first row and remove the first column (Time) + key = u_nk.index.values[0][1:] + col = u_nk[key] + subsample = statistical_inefficiency(u_nk, u_nk[key]) + elif uncorr == 'dhdl_all': + subsample = statistical_inefficiency(u_nk, u_nk.sum()) + elif uncorr == 'dE': + # Using the same logic as alchemical-analysis + key = u_nk.index.values[0][1:] + index = u_nk.columns.values.tolist().index(key) + # for the state that is not the last state, take the state+1 + if index + 1 < len(u_nk.columns): + subsample = statistical_inefficiency( + u_nk, u_nk.iloc[:, index + 1]) + # for the state that is the last state, take the state-1 + else: + subsample = statistical_inefficiency( + u_nk, u_nk.iloc[:, index - 1]) + else: + raise NameError( + 'Decorrelation method {} not found.'.format(uncorr)) + + if len(subsample) < threshold: + self.logger.warning('Number of u_nk {} for state {} is ' + 'less than the threshold {}.'.format( + len(subsample), index, threshold)) + self.logger.info('Take all the u_nk for state {}.'.format(index)) + self.u_nk_sample_list.append(subsample) + else: + self.logger.info('Take {} uncorrelated u_nk for state ' + '{}.'.format(len(subsample), index)) + self.u_nk_sample_list.append(u_nk) + + self.dHdl_sample_list = [] + for index, dHdl in enumerate(self.dHdl_list): + dHdl = dHdl[dHdl.index.get_level_values('time') > skiptime] + subsample = statistical_inefficiency(dHdl, dHdl) + if len(subsample) < threshold: + self.logger.warning('Number of dHdl {} for state {} is ' + 'less than the threshold {}.'.format( + len(subsample), index, threshold)) + self.logger.info('Take all the dHdl for state {}.'.format(index)) + self.dHdl_sample_list.append(subsample) + else: + self.logger.info('Take {} uncorrelated dHdl for state ' + '{}.'.format(len(subsample), index)) + self.dHdl_sample_list.append(dHdl) + + def estimate(self, estimators=('mbar', 'bar', 'ti'), out='./'): + self.logger.info( + 'Start running estimator: {}.'.format(','.join(estimators))) + self.estimator = {} + # Use unprocessed data if preprocess is not performed. + try: + dHdl = pd.concat(self.dHdl_sample_list) + except AttributeError: + dHdl = pd.concat(self.dHdl_list) + self.logger.warning('dHdl has not been preprocessed.') + self.logger.info( + 'A total {} lines of dHdl is used.'.format(len(dHdl))) + + try: + u_nk = pd.concat(self.u_nk_sample_list) + except AttributeError: + u_nk = pd.concat(self.u_nk_list) + self.logger.warning('u_nk has not been preprocessed.') + self.logger.info( + 'A total {} lines of u_nk is used.'.format(len(u_nk))) + + for estimator in estimators: + if estimator.lower() == 'mbar' and len(u_nk) > 0: + self.logger.info('Run MBAR estimator.') + self.estimator['mbar'] = MBAR().fit(u_nk) + elif estimator.lower() == 'bar' and len(u_nk) > 0: + self.logger.info('Run BAR estimator.') + self.estimator['bar'] = BAR().fit(u_nk) + elif estimator.lower() == 'ti' and len(dHdl) > 0: + self.logger.info('Run TI estimator.') + self.estimator['ti'] = TI().fit(dHdl) + elif estimator.lower() == 'mbar' or estimator.lower() == 'bar': + self.logger.warning('MBAR or BAR estimator require u_nk') + else: + self.logger.warning( + '{} is not a valid estimator.'.format(estimator)) + + + + + + + + + + + + + + + + diff --git a/src/alchemlyb/workflows/base.py b/src/alchemlyb/workflows/base.py index 99d72832..9c9857e7 100644 --- a/src/alchemlyb/workflows/base.py +++ b/src/alchemlyb/workflows/base.py @@ -5,7 +5,7 @@ class WorkflowBase(): '''Base class for workflow creation. ''' def __init__(self, **kwargs): - self.logger = logging.getLogger('Initialise Alchemlyb Workflow') + self.load_data(**kwargs) self.sub_sampling(**kwargs) self.sub_sampling(**kwargs) @@ -13,19 +13,7 @@ def __init__(self, **kwargs): self.plot(**kwargs) self.write(**kwargs) - def load_data(self, software='Gromacs', dir='./', prefix='dhdl', - suffix='xvg', T=298): - self.logger.info('Finding files with prefix: {}, suffix: {} under ' - 'directory {} produced by {}'.format(prefix, suffix, - dir, software)) - xvg_list = [] - file_list = os.listdir(dir) - for file in file_list: - if file[:len(prefix)] == prefix and file[-len(prefix):] == suffix: - xvg_list.append(os.path.join(dir, file)) - - self.logger.info('Found {} files.'.format(len(xvg_list))) - self.logger.debug('File list: \n {}'.format('\n'.join(xvg_list))) + def load_data(self, return xvg_list def sub_sampling(self): From f05c5dd69f647639d8a9329c15d7fbd4c92ac4aa Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 8 Mar 2021 10:38:53 +0000 Subject: [PATCH 003/123] update --- src/alchemlyb/preprocessing/subsampling.py | 7 +++++-- src/alchemlyb/workflows/abfe.py | 10 ++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/alchemlyb/preprocessing/subsampling.py b/src/alchemlyb/preprocessing/subsampling.py index 0b1a91a1..e7fe88b8 100644 --- a/src/alchemlyb/preprocessing/subsampling.py +++ b/src/alchemlyb/preprocessing/subsampling.py @@ -2,14 +2,17 @@ """ import numpy as np +import pandas as pd from pymbar.timeseries import (statisticalInefficiency, detectEquilibration, subsampleCorrelatedData, ) def _check_multiple_times(df): - return df.sort_index(0).reset_index(0).duplicated('time').any() - + if isinstance(df, pd.Series): + return df.sort_index(0).reset_index('time', name='').duplicated('time').any() + else: + return df.sort_index(0).reset_index('time').duplicated('time').any() def _check_sorted(df): return df.reset_index(0)['time'].is_monotonic_increasing diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 5736eb57..aef08ae7 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -9,7 +9,8 @@ class ABFE(): def __init__(self, software='Gromacs', dir='./', prefix='dhdl', suffix='xvg', T=298, skiptime=None, uncorr=None, - threshold=50, estimator=None, out='./', forwrev=0): + threshold=50, estimator=None, out='./', forwrev=0, log='result.log'): + logging.basicConfig(filename=log, level=logging.DEBUG) self.logger = logging.getLogger('alchemlyb.workflows.ABFE') self.logger.info('Initialise Alchemlyb ABFE Workflow') self.logger.info('Finding files with prefix: {}, suffix: {} under ' @@ -91,7 +92,8 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): self.u_nk_sample_list = [] for index, u_nk in enumerate(self.u_nk_list): - # Get rid of the skiptime + # Find the starting frame + u_nk = u_nk[u_nk.index.get_level_values('time')>skiptime] if uncorr == 'dhdl': # Find the current column index @@ -100,7 +102,7 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): col = u_nk[key] subsample = statistical_inefficiency(u_nk, u_nk[key]) elif uncorr == 'dhdl_all': - subsample = statistical_inefficiency(u_nk, u_nk.sum()) + subsample = statistical_inefficiency(u_nk, u_nk.sum(axis=1)) elif uncorr == 'dE': # Using the same logic as alchemical-analysis key = u_nk.index.values[0][1:] @@ -131,7 +133,7 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): self.dHdl_sample_list = [] for index, dHdl in enumerate(self.dHdl_list): dHdl = dHdl[dHdl.index.get_level_values('time') > skiptime] - subsample = statistical_inefficiency(dHdl, dHdl) + subsample = statistical_inefficiency(dHdl, dHdl.sum(axis=1)) if len(subsample) < threshold: self.logger.warning('Number of dHdl {} for state {} is ' 'less than the threshold {}.'.format( From 36f107a009c7f0ce435a96b97f489e84cf251524 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 14 Mar 2021 20:52:08 +0000 Subject: [PATCH 004/123] update --- .gitignore | 1 + src/alchemlyb/estimators/ti_.py | 37 +++ src/alchemlyb/tests/test_workflow.py | 18 ++ src/alchemlyb/visualisation/__init__.py | 3 +- src/alchemlyb/visualisation/convergence.py | 48 +++ src/alchemlyb/visualisation/dF_state.py | 10 +- src/alchemlyb/visualisation/ti_dhdl.py | 45 ++- src/alchemlyb/workflows/abfe.py | 350 ++++++++++++++++++--- 8 files changed, 452 insertions(+), 60 deletions(-) create mode 100644 src/alchemlyb/tests/test_workflow.py create mode 100644 src/alchemlyb/visualisation/convergence.py diff --git a/.gitignore b/.gitignore index f71e81e2..7657ab12 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ .vscode *.DS_Store build +.idea diff --git a/src/alchemlyb/estimators/ti_.py b/src/alchemlyb/estimators/ti_.py index 383341c9..171102e4 100644 --- a/src/alchemlyb/estimators/ti_.py +++ b/src/alchemlyb/estimators/ti_.py @@ -105,3 +105,40 @@ def fit(self, dHdl): self.states_ = means.index.values.tolist() return self + + def separate_dhdl(self): + """ + For transitions with multiple lambda, this function will separate the + dhdl with multiple columns into a list of Dataframe with a single column + (single lambda). + + Returns + ---------- + dHdl_list : list + A list of DataFrame such that dHdl_list[k][n] is the potential + energy gradient with respect to lambda for each configuration n and + lambda k. + + """ + if len(self.dhdl.index.names) == 1: + # If only one column is present + return [self.dhdl, ] + else: + dhdl_list = [] + # get the lambda names + l_types = self.dhdl.index.names + # obtain bool of changed lambdas between each state + lambdas = self.dhdl.reset_index()[l_types] + diff = lambdas.diff().to_numpy(dtype='bool') + # diff will give the first row as NaN so need to fix that + diff[0, :] = diff[1, :] + # Make sure that the start point is set to true as well + diff[:-1, :] = diff[:-1, :] | diff[1:, :] + for i in range(len(l_types)): + new = self.dhdl.iloc[diff[:,i], i] + # drop all other index + for l in l_types: + if l != l_types[i]: + new = new.reset_index(l, drop=True) + dhdl_list.append(new) + return dhdl_list diff --git a/src/alchemlyb/tests/test_workflow.py b/src/alchemlyb/tests/test_workflow.py new file mode 100644 index 00000000..2d441440 --- /dev/null +++ b/src/alchemlyb/tests/test_workflow.py @@ -0,0 +1,18 @@ +import pandas as pd +import numpy as np +import pytest +import os + +from alchemlyb.workflows.abfe import ABFE +from alchemtest.gmx import load_ABFE, load_expanded_ensemble_case_1 + +data = load_ABFE() +dir = os.path.dirname(data['data']['complex'][0]) +workflow = ABFE(dir=dir, T=310) +workflow.preprocess() +workflow.estimate() +workflow.write() +workflow.plot_overlap_matrix() +workflow.plot_ti_dhdl() +workflow.plot_dF_state() +workflow.check_convergence(10) \ No newline at end of file diff --git a/src/alchemlyb/visualisation/__init__.py b/src/alchemlyb/visualisation/__init__.py index b7cf63cc..d58b367e 100644 --- a/src/alchemlyb/visualisation/__init__.py +++ b/src/alchemlyb/visualisation/__init__.py @@ -1,3 +1,4 @@ from .mbar_matrix import plot_mbar_overlap_matrix from .ti_dhdl import plot_ti_dhdl -from .dF_state import plot_dF_state \ No newline at end of file +from .dF_state import plot_dF_state +from .convergence import plot_convergence \ No newline at end of file diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py new file mode 100644 index 00000000..4197e0dd --- /dev/null +++ b/src/alchemlyb/visualisation/convergence.py @@ -0,0 +1,48 @@ +import matplotlib.pyplot as plt +from matplotlib.font_manager import FontProperties as FP +import numpy as np + +def plot_convergence(forward, forward_error, backward, backward_error, + units='kBT', ax=None): + """Plots the free energy change computed using the equilibrated snapshots between the proper target time frames (f_ts and r_ts) + in both forward (data points are stored in F_df and F_ddf) and reverse (data points are stored in R_df and R_ddf) directions.""" + if ax is None: + fig, ax = plt.subplots(figsize=(8, 6)) + + plt.setp(ax.spines['bottom'], color='#D2B9D3', lw=3, zorder=-2) + plt.setp(ax.spines['left'], color='#D2B9D3', lw=3, zorder=-2) + for dire in ['top', 'right']: + ax.spines[dire].set_color('none') + ax.xaxis.set_ticks_position('bottom') + ax.yaxis.set_ticks_position('left') + + f_ts = np.linspace(0, 1, len(forward) + 1)[1:] + r_ts = np.linspace(0, 1, len(backward) + 1)[1:] + + line0 = ax.fill_between([0, 1], backward[-1] - backward_error[-1], + backward[-1] + backward_error[-1], color='#D2B9D3', + zorder=1) + line1 = ax.errorbar(f_ts, forward, yerr=forward_error, color='#736AFF', + lw=3, solid_capstyle='round', zorder=2, marker='o', + mfc='w', mew=2.5, mec='#736AFF', ms=12,) + line2 = ax.errorbar(r_ts, backward, yerr=backward_error, color='#C11B17', + lw=3, solid_capstyle='round', zorder=3, marker='o', + mfc='w', mew=2.5, mec='#C11B17', ms=12, ) + + # ax.set_xlim(0,0.5) + + plt.xticks(r_ts[::2], fontsize=10) + plt.yticks(fontsize=10) + + leg = plt.legend((line1[0], line2[0]), (r'$Forward$', r'$Reverse$'), loc=9, + prop=FP(size=18), frameon=False) + plt.xlabel(r'$\mathrm{Fraction\/of\/the\/simulation\/time}$', fontsize=16, + color='#151B54') + plt.ylabel(r'$\mathrm{\Delta G\/%s}$' % units, fontsize=16, + color='#151B54') + plt.xticks(f_ts, ['%.2f' % i for i in f_ts]) + plt.tick_params(axis='x', color='#D2B9D3') + plt.tick_params(axis='y', color='#D2B9D3') + return ax + + diff --git a/src/alchemlyb/visualisation/dF_state.py b/src/alchemlyb/visualisation/dF_state.py index 569df9b7..4366c94e 100644 --- a/src/alchemlyb/visualisation/dF_state.py +++ b/src/alchemlyb/visualisation/dF_state.py @@ -14,8 +14,8 @@ from ..estimators import TI, BAR, MBAR -def plot_dF_state(estimators, labels=None, colors=None, units='kcal/mol', - orientation='portrait', nb=10): +def plot_dF_state(estimators, labels=None, colors=None, units='kBT', + scaling_factor=1, orientation='portrait', nb=10): '''Plot the dhdl of TI. Parameters @@ -31,6 +31,8 @@ def plot_dF_state(estimators, labels=None, colors=None, units='kcal/mol', list of colors for plotting different estimators. units : str The unit of the estimate. Default: 'kcal/mol' + scaling_factor : float + The scaling factor to change the energy from kBT to the desired unit. orientation : string The orientation of the figure. Can be `portrait` or `landscape` nb : int @@ -46,6 +48,10 @@ def plot_dF_state(estimators, labels=None, colors=None, units='kcal/mol', The code is taken and modified from : `Alchemical Analysis `_ + The units variable is for labelling only. Changing it doesn't change the + unit of the underlying variable, which is in the unit of kBT. The + scaling_factor is used to change the number to the desired unit. + ''' try: len(estimators) diff --git a/src/alchemlyb/visualisation/ti_dhdl.py b/src/alchemlyb/visualisation/ti_dhdl.py index 7b53217b..75d62e6f 100644 --- a/src/alchemlyb/visualisation/ti_dhdl.py +++ b/src/alchemlyb/visualisation/ti_dhdl.py @@ -14,7 +14,8 @@ from matplotlib.font_manager import FontProperties as FP import numpy as np -def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kcal/mol', ax=None): +def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kBT', + scaling_factor=1, ax=None): '''Plot the dhdl of TI. Parameters @@ -28,7 +29,9 @@ def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kcal/mol', ax=None) list of colors for plotting all the alchemical transformations. Default: ['r', 'g', '#7F38EC', '#9F000F', 'b', 'y'] units : str - The unit of the estimate. Default: 'kcal/mol' + The label for the unit of the estimate. Default: 'kBT' + scaling_factor : float + The scaling factor to change the energy from kBT to the desired unit. ax : matplotlib.axes.Axes Matplotlib axes object where the plot will be drawn on. If ax=None, a new axes will be generated. @@ -43,12 +46,21 @@ def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kcal/mol', ax=None) The code is taken and modified from : `Alchemical Analysis `_ + The units variable is for labelling only. Changing it doesn't change the + unit of the underlying variable, which is in the unit of kBT. The + scaling_factor is used to change the number to the desired unit. + ''' + # Fix unit + + # Make it into a list - try: - len(dhdl_data) - except TypeError: - dhdl_data = [dhdl_data, ] + if not isinstance(dhdl_data, list): + dhdl_list = dhdl_data.separate_dhdl() + else: + dhdl_list = [] + for dhdl in dhdl_data: + dhdl_list.extend(dhdl.separate_dhdl()) if ax is None: fig, ax = plt.subplots(figsize=(8, 6)) @@ -65,33 +77,33 @@ def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kcal/mol', ax=None) # Make level names if labels is None: lv_names2 = [] - for dhdl in dhdl_data: + for dhdl in dhdl_list: # Assume that the dhdl has only one columns - lv_names2.append(dhdl.dhdl.columns.values[0].capitalize()) + lv_names2.append(dhdl.name.capitalize()) else: - if len(labels) == len(dhdl_data): + if len(labels) == len(dhdl_list): lv_names2 = labels else: # pragma: no cover raise ValueError( 'Length of labels ({}) should be the same as the number of data ({})'.format( - len(labels), len(dhdl_data))) + len(labels), len(dhdl_list))) if colors is None: colors = ['r', 'g', '#7F38EC', '#9F000F', 'b', 'y'] else: - if len(colors) >= len(dhdl_data): + if len(colors) >= len(dhdl_list): pass else: # pragma: no cover raise ValueError( 'Number of colors ({}) should be larger than the number of data ({})'.format( - len(labels), len(dhdl_data))) + len(labels), len(dhdl_list))) # Get the real data out xs, ndx, dx = [0], 0, 0.001 min_y, max_y = 0, 0 - for dhdl in dhdl_data: - x = dhdl.dhdl.index.values - y = dhdl.dhdl.values.ravel() + for dhdl in dhdl_list: + x = dhdl.index.values + y = dhdl.values.ravel() * scaling_factor min_y = min(y.min(), min_y) max_y = max(y.max(), max_y) @@ -152,8 +164,7 @@ def getInd(r=ri, z=[0]): for i, j in zip(xs[1:], xt[1:]): ax.annotate( ('%.2f' % (i - 1.0 if i > 1.0 else i) if not j == '' else ''), - xy=(i, 0), xytext=(i, 0.01), size=10, rotation=90, - textcoords=('data', 'axes fraction'), va='bottom', ha='center', + xy=(i, 0), size=10, rotation=90, va='bottom', ha='center', color='#151B54') if ndx > 1: lenticks = len(ax.get_ymajorticklabels()) - 1 diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index aef08ae7..4dde2429 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -1,29 +1,42 @@ import os +from os.path import join +from glob import glob import pandas as pd +import numpy as np +import scipy import logging from ..parsing import gmx, amber, namd, gomc from ..preprocessing.subsampling import statistical_inefficiency from ..estimators import MBAR, BAR, TI +from ..visualisation import (plot_mbar_overlap_matrix, plot_ti_dhdl, + plot_dF_state, plot_convergence) + class ABFE(): - def __init__(self, software='Gromacs', dir='./', prefix='dhdl', - suffix='xvg', T=298, skiptime=None, uncorr=None, - threshold=50, estimator=None, out='./', forwrev=0, log='result.log'): - logging.basicConfig(filename=log, level=logging.DEBUG) + def __init__(self, units='kcal/mol', software='Gromacs', dir='./', + prefix='dhdl', suffix='xvg', T=298, skiptime=None, uncorr=None, + threshold=50, estimator=None, out='./', resultfilename=None, + overlap=None, breakdown=None, forwrev=None, + log='result.log'): + logging.basicConfig(filename=log, level=logging.INFO) self.logger = logging.getLogger('alchemlyb.workflows.ABFE') self.logger.info('Initialise Alchemlyb ABFE Workflow') + + self.logger.info('Set temperature to {} K.'.format(T)) + self.T = T + self.out = out + + self._update_units(units) + self.logger.info('Finding files with prefix: {}, suffix: {} under ' 'directory {} produced by {}'.format(prefix, suffix, dir, software)) - self.file_list = [] - file_list = os.listdir(dir) - for file in file_list: - if file[:len(prefix)] == prefix and file[-len(suffix):] == suffix: - self.file_list.append(os.path.join(dir, file)) + file_list = glob(join(dir, prefix + '*' + suffix)) - self.logger.info('Found {} xvg files.'.format(len(self.file_list))) - self.logger.debug('File list: \n {}'.format('\n'.join(self.file_list))) + self.logger.info('Found {} xvg files.'.format(len(file_list))) + self.logger.info('Unsorted file list: \n{}'.format('\n'.join( + file_list))) if software.lower() == 'gromacs': self.logger.info('Using {} parser to read the data.'.format( @@ -48,38 +61,82 @@ def __init__(self, software='Gromacs', dir='./', prefix='dhdl', else: raise NameError('{} parser not found.'.format(software)) - self.u_nk_list = [] - self.dHdl_list = [] - for xvg in self.file_list: + u_nk_list = [] + dHdl_list = [] + for xvg in file_list: try: u_nk = extract_u_nk(xvg, T=T) - self.logger.debug( + self.logger.info( 'Reading {} lines of u_nk from {}'.format(len(u_nk), xvg)) - self.u_nk_list.append(u_nk) + u_nk_list.append(u_nk) except: self.logger.warning( 'Error reading read u_nk from {}.'.format(xvg)) try: dhdl = extract_dHdl(xvg, T=T) - self.logger.debug( + self.logger.info( 'Reading {} lines of dhdl from {}'.format(len(dhdl), xvg)) - self.dHdl_list.append(dhdl) + dHdl_list.append(dhdl) except: self.logger.warning( 'Error reading read dhdl from {}.'.format(xvg)) - # Sort the files according to the state - self.u_nk_list.sort(key=lambda x: x.attrs['state']) - self.dHdl_list.sort(key=lambda x: x.attrs['state']) + # # Sort the files according to the state + if len(u_nk_list) > 0: + self.logger.info('Sort files according to the u_nk.') + column_names = u_nk_list[0].columns.values.tolist() + index_list = sorted(range(len(file_list)), + key=lambda x:column_names.index( + u_nk_list[x].reset_index('time').index.values[0])) + else: + self.logger.info('Sort files according to the dHdl.') + column_names = sorted([dHdl.reset_index('time').index.values[0] + for dHdl in dHdl_list]) + index_list = sorted(range(len(file_list)), + key=lambda x:column_names.index( + dHdl_list[x].reset_index('time').index.values[0])) + + self.file_list = [file_list[i] for i in index_list] + self.logger.info('Sorted file list: \n{}'.format('\n'.join( + self.file_list))) + self.u_nk_list = [u_nk_list[i] for i in index_list] + self.dHdl_list = [dHdl_list[i] for i in index_list] if skiptime is not None and uncorr is not None: self.preprocess(skiptime=skiptime, uncorr=uncorr, threshold=threshold) if estimator is not None: - self.estimate(estimator, out=out) - - + self.estimate(estimator) + + if resultfilename is not None: + self.write(estimator, resultfilename=resultfilename, units=units) + + if overlap is not None: + self.plot_overlap_matrix(overlap) + + if breakdown: + self.plot_ti_dhdl() + self.plot_dF_state() + self.plot_dF_state(dF_state='dF_state_long.pdf', + orientation='landscape') + + + def _update_units(self, units): + if units is not None: + self.logger.info('Set unit to {}.'.format(units)) + if units == 'kBT': + self.scaling_factor = 1 + elif units == 'kJ/mol': + self.scaling_factor = scipy.constants.k * self.T * scipy.constants.N_A / \ + 1000 + elif units == 'kcal/mol': + kJ2kcal = 0.239006 + self.scaling_factor = scipy.constants.k * self.T * scipy.constants.N_A / \ + 1000 * kJ2kcal + else: + raise NameError('{} is not a valid unit.'.format(units)) + self.units = units def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): self.logger.info('Start preprocessing with skiptime of {} ' @@ -145,7 +202,11 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): '{}.'.format(len(subsample), index)) self.dHdl_sample_list.append(dHdl) - def estimate(self, estimators=('mbar', 'bar', 'ti'), out='./'): + def estimate(self, estimators=('mbar', 'bar', 'ti')): + # Make estimators into a tuple + if isinstance(estimators, str): + estimators = (estimators, ) + self.logger.info( 'Start running estimator: {}.'.format(','.join(estimators))) self.estimator = {} @@ -182,20 +243,229 @@ def estimate(self, estimators=('mbar', 'bar', 'ti'), out='./'): self.logger.warning( '{} is not a valid estimator.'.format(estimator)) + def write(self, resultfilename='result.out', units=None): + self._update_units(units) + + # Write estimate + self.logger.info('Write the estimate as txt file to {} under {} ' + 'with unit {}.'.format( + resultfilename, self.out, self.units)) + # Make the header name + self.logger.info('Write the header names.') + result_out = [['------------', ], + [' States ', ], + ['------------', ],] + eitimator_names = list(self.estimator.keys()) + num_states = len(self.estimator[eitimator_names[0]].states_) + for i in range(num_states - 1): + result_out.append([str(i).rjust(4) + ' -- ' + str(i+1).ljust(4), ]) + result_out.append(['------------', ]) + try: + u_nk = self.u_nk_list[0] + stages = u_nk.reset_index('time').index.names + self.logger.info('use the stage name from u_nk') + except: + try: + dHdl = self.dHdl_list[0] + stages = dHdl.reset_index('time').index.names + self.logger.info('use the stage name from dHdl') + except: + stages = [] + self.logger.warning('No stage name found in dHdl or u_nk') + for stage in stages: + result_out.append([stage.split('-')[0][:9].rjust(9)+': ', ]) + result_out.append(['TOTAL'.rjust(9) + ': ', ]) + + for estimator_name, estimator in self.estimator.items(): + self.logger.info('write the result from estimator {}'.format( + estimator_name)) + # Write the estimator header + result_out[0].append('---------------------') + result_out[1].append('{} ({}) '.format( + estimator_name.upper(), self.units).rjust(21)) + result_out[2].append('---------------------') + for index in range(1, num_states): + result_out[2+index].append('{:.3f} +- {:.3f}'.format( + estimator.delta_f_.iloc[index-1, index]*self.scaling_factor, + estimator.d_delta_f_.iloc[index-1, index]*self.scaling_factor + ).rjust(21)) + + result_out[2+num_states].append('---------------------') + + self.logger.info('write the staged result from estimator {}'.format( + estimator_name)) + for index, stage in enumerate(stages): + start = list(reversed( + [state[index] for state in estimator.states_])).index(0) + start = num_states - start - 1 + end = [state[index] for state in estimator.states_].index(1) + self.logger.info( + 'Stage {} is from state {} to state {}.'.format( + stage, start, end)) + result = estimator.delta_f_.iloc[start, end]*self.scaling_factor + if estimator_name != 'bar': + error = estimator.d_delta_f_.iloc[start, end]*self.scaling_factor + else: + error = np.sqrt(sum( + [estimator.d_delta_f_.iloc[start, start+1]**2 + for i in range(start, end + 1)])) * self.scaling_factor + result_out[3 + num_states + index].append( + '{:.3f} +- {:.3f}'.format(result, error,).rjust(21)) + + # Total result + result = estimator.delta_f_.iloc[0, -1] * self.scaling_factor + if estimator_name != 'bar': + error = estimator.d_delta_f_.iloc[0, -1] * self.scaling_factor + else: + error = np.sqrt(sum( + [estimator.d_delta_f_.iloc[i, i + 1] ** 2 + for i in range(num_states - 1)])) * self.scaling_factor + result_out[3 + num_states + len(stages)].append( + '{:.3f} +- {:.3f}'.format(result, error, ).rjust(21)) + self.logger.info('Write results:\n'+ + '\n'.join([' '.join(line) for line in result_out])) + with open(join(self.out, resultfilename), 'w') as f: + f.write('\n'.join([' '.join(line) for line in result_out])) + + def plot_overlap_matrix(self, overlap='O_MBAR.pdf', ax=None): + self.logger.info('Plot overlap matrix.') + if 'mbar' in self.estimator: + ax = plot_mbar_overlap_matrix(self.estimator['mbar'].overlap_matrix, + ax=ax) + ax.figure.savefig(join(self.out, overlap)) + self.logger.info('Plot overlap matrix to {} under {}.' + ''.format(self.out, overlap)) + return ax + else: + self.logger.warning('MBAR estimator not found. ' + 'Overlap matrix not plotted.') + + def plot_ti_dhdl(self, dhdl_TI='dhdl_TI.pdf', units=None, labels=None, + colors=None, ax=None): + self._update_units(units) + self.logger.info('Plot TI dHdl.') + if 'ti' in self.estimator: + ax = plot_ti_dhdl(self.estimator['ti'], units=self.units, + labels=labels, colors=colors, ax=ax, + scaling_factor=self.scaling_factor) + ax.figure.savefig(join(self.out, dhdl_TI)) + self.logger.info('Plot TI dHdl to {} under {}.' + ''.format(dhdl_TI, self.out)) + + def plot_dF_state(self, dF_state='dF_state.pdf', labels=None, colors=None, + units=None, orientation='portrait', nb=10): + self._update_units(units) + self.logger.info('Plot dF states.') + fig = plot_dF_state(self.estimator.values(), labels=labels, colors=colors, + units=self.units, + scaling_factor=self.scaling_factor, + orientation=orientation, nb=nb) + fig.savefig(join(self.out, dF_state)) + self.logger.info('Plot dF state to {} under {}.' + ''.format(dF_state, self.out)) + + def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', + units=None): + self._update_units(units) + self.logger.info('Start convergence analysis.') + self.logger.info('Check data availability.') + try: + dHdl_list = self.dHdl_sample_list + self.logger.info('Subsampled dHdl is available.') + except AttributeError: + try: + dHdl_list = self.dHdl_list + self.logger.info('Subsampled dHdl not available, ' + 'use original data instead.') + except AttributeError: + self.logger.warning('dHdl is not available.') - - - - - - - - - - - - - - - + try: + u_nk_list = self.u_nk_sample_list + self.logger.info('Subsampled u_nk is available.') + except AttributeError: + try: + u_nk_list = self.u_nk_list + self.logger.info('Subsampled u_nk not available, ' + 'use original data instead.') + except AttributeError: + self.logger.warning('u_nk is not available.') + + if estimator.lower() == 'mbar': + self.logger.info('Use MBAR estimator for convergence analysis.') + estimator_fit = MBAR().fit + elif estimator.lower() == 'bar': + self.logger.info('Use BAR estimator for convergence analysis.') + estimator_fit = BAR().fit + elif estimator.lower() == 'ti': + self.logger.info('Use TI estimator for convergence analysis.') + estimator_fit = TI().fit + else: + self.logger.warning( + '{} is not a valid estimator.'.format(estimator)) + + self.logger.info('Begin forward analysis') + forward_list = [] + forward_error_list = [] + for i in range(1, forwrev + 1): + self.logger.info('Forward analysis: {:.2f}%'.format(i / forwrev)) + sample = [] + if estimator.lower() in ['mbar', 'bar']: + for data in u_nk_list: + sample.append(data[:len(data) // forwrev * i]) + elif estimator.lower() == 'ti': + for data in dHdl_list: + sample.append(data[:len(data) // forwrev * i]) + sample = pd.concat(sample) + result = estimator_fit(sample) + forward_list.append(result.delta_f_.iloc[0, -1]) + if estimator.lower() == 'bar': + error = np.sqrt(sum( + [result.d_delta_f_.iloc[i, i + 1] ** 2 + for i in range(len(result.d_delta_f_) - 1)])) + forward_error_list.append(error) + else: + forward_error_list.append(result.d_delta_f_.iloc[0, -1]) + self.logger.info('{:.2f} +/- {:.2f} kBT'.format(forward_list[-1], + forward_error_list[-1])) + + self.logger.info('Begin backward analysis') + backward_list = [] + backward_error_list = [] + for i in range(1, forwrev + 1): + self.logger.info('Backward analysis: {:.2f}%'.format(i / forwrev)) + sample = [] + if estimator.lower() in ['mbar', 'bar']: + for data in u_nk_list: + sample.append(data[-len(data) // forwrev * i:]) + elif estimator.lower() == 'ti': + for data in dHdl_list: + sample.append(data[-len(data) // forwrev * i:]) + sample = pd.concat(sample) + result = estimator_fit(sample) + backward_list.append(result.delta_f_.iloc[0, -1]) + if estimator.lower() == 'bar': + error = np.sqrt(sum( + [result.d_delta_f_.iloc[i, i + 1] ** 2 + for i in range(len(result.d_delta_f_) - 1)])) + backward_error_list.append(error) + else: + backward_error_list.append(result.d_delta_f_.iloc[0, -1]) + self.logger.info('{:.2f} +/- {:.2f} kBT'.format(backward_list[-1], + backward_error_list[-1])) + + convergence = pd.DataFrame({'Forward (kBT)': forward_list, + 'F. Error (kBT)': forward_error_list, + 'Backward (kBT)': backward_list, + 'B. Error (kBT)': backward_error_list}) + + self.convergence = convergence + self.logger.info('Plot convergence analysis to {} under {}.' + ''.format(dF_t, self.out)) + ax = plot_convergence(np.array(forward_list) * self.scaling_factor, + np.array(forward_error_list) * self.scaling_factor, + np.array(backward_list) * self.scaling_factor, + np.array(backward_error_list) * self.scaling_factor, + units=self.units) + ax.figure.savefig(join(self.out, dF_t)) From 70d56bdda05d2de0eaa785339ddd1685ea8b31bc Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 21 Mar 2021 19:12:14 +0000 Subject: [PATCH 005/123] add doc --- .gitignore | 2 + docs/images/dF_t.png | Bin 0 -> 44948 bytes docs/index.rst | 1 + docs/visualisation.rst | 48 ++++ ...chemlyb.visualisation.plot_convergence.rst | 19 ++ .../alchemlyb.visualisation.plot_dF_state.rst | 2 +- .../alchemlyb.visualisation.plot_ti_dhdl.rst | 2 +- docs/workflow.rst | 109 +++++++ src/alchemlyb/__init__.py | 1 - src/alchemlyb/preprocessing/subsampling.py | 50 +++- src/alchemlyb/tests/test_workflow.py | 36 ++- src/alchemlyb/visualisation/convergence.py | 30 +- src/alchemlyb/visualisation/dF_state.py | 4 +- src/alchemlyb/visualisation/mbar_matrix.py | 4 +- src/alchemlyb/visualisation/ti_dhdl.py | 4 +- src/alchemlyb/workflows/__init__.py | 1 + src/alchemlyb/workflows/abfe.py | 270 ++++++++++++++++-- 17 files changed, 533 insertions(+), 50 deletions(-) create mode 100644 docs/images/dF_t.png create mode 100644 docs/visualisation/alchemlyb.visualisation.plot_convergence.rst create mode 100644 docs/workflow.rst diff --git a/.gitignore b/.gitignore index 7657ab12..b2bbd452 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ *.DS_Store build .idea +docs/_build/ +src/alchemlyb.egg-info/ diff --git a/docs/images/dF_t.png b/docs/images/dF_t.png new file mode 100644 index 0000000000000000000000000000000000000000..625e9a5a91dca985c6b1e88cafabb7d951d65dbb GIT binary patch literal 44948 zcmeFZWl&sg)GgRJ!6mr6TL>E5LxP6jfe1=H8U}j^@&CbpKl9lqaqob{Z zAP0xl|M3EL8+%g@EVY53;7y*{%4s=3AeaWwzc7WO1!fRP1*3# zTH}d&(JVK^lF)!kLk<4;PpJLRACGQev-RS?ym1RTn$uC^Js&4xZ1I{cl!jg=d?nI< zY^@Msu#tGx^bl!NVZ%@$wZzIr5KIorI*#T2zmsJ-k?Z{&ZCx}`LVnB3A9M5aYMmTH zLbShv{TPF;`x*vgA3>1}sYd(kpzR)VPKJy|@+=@|uFb0q8wba)M5BUN`zXmY{Z}xB z_vbsr168Bad^IzA@$uyNb>T&0#S8OlLqB`%6EMY5Gke18u_UAS?Um#MJ zmzRfp{c5>3InO_o_9{3GtRBckg==ffno?5gwsg`$`|?HbnZ;!_e<#3Z1rMT@TFSSh-*Dj!S(fgv^dZ3*|Y=nTHHa5f4tb8Fu9`; z@g&3vg$IAG7CkCvF8}^jZD?we1pj@s*ph7}n1<2he9*yC(d-vPBTLQ7iiUxKVe917 z?F$2UeAsl9{o}`v?t^lxr^knet#ps&qa#dNS=sZ$1(G)j?17ZX`Rx_p2V@i!^S{H% zA%!iUFhM}zkxnx_3VE=Pu;j0JWq%2We^_7Hg=v*sLRXk5MD>oOOK{2&G6qljWZcS!?p=rue(%gV=bw#@ zR9sxxK2LW`F^P%DaqY40hqNp3EsU3XJ)K1PEbBkr5`Y`E=(>h(WNMnz&_HNx zY<#^QtCXqCc)XS2vwBd!#5*_TO7palpuc*&(s_M%-+7ek}hV?E z6WO0Xf1nFd>3L(%s@pK~vU#sBj)jqt5d!&KtG&O}M&WgP&UJ8faDWcEK%{(nIP%FF z+d2OGhGXSsD}xVJjMrxI>-xHpJc}?yDy~6v45vv09K*@KeexeFEQ8QYzSF5V11#@V=DU!vuYA zB0tLIsvh#{>*|>PjfRbQowpf8zE~=zBSo;$gk_(5~t^GFm{gp`W z?b{kn{WA5=sVM@m%b0I34=k#N4i}p2mb|VErpiC?yB58{M@nE=!MYP z0QUv?D@w}t^VXj~XU`X1X^svLBN@_Nvlk!O5GX-(O1ilTko(-*@xOZ2xv)ScEiH{z zUf~a->1&xzeMfgULX_}jV)t5nLc-eFuxJmsb=btjD5j>Sqn-4Ikg-||jZtp}v#T#~ zNVXv9fcxKk{`bv=UdoB-+RDn8<715Y+HWDfeu_Ns7Psd+*~3T8XBhEpIziXhuA{gD zIhB>1p68>o5Hv$awi}Qvr7|>1HNRwd-#&Zgyx+b*Q?0OJ-FK^-Q8LT4S0BXh2`Zz2?+^>_Z@+#$Y^K*@_8N6 zRFajSN2vjp&^&0CG9vLpf>b^DhMmS!tW6s%Pi3GVA^eKoqWOIqLK6OMC?pnFu?>!p zzronb&feM^^XTp>04D&i%&ptUk7hAZ{Qlx{fR>KVpzk|VVQFbsp-K*?&!gv8 zLavUNE^wTlM_2nZxV|^Txl9MVlV+9@Pg#<|5Zh}!Y$x(GcF1cvIWe%Ai;IhwL8(P2 z;iVK5oHg(9x!Fj-e*QcFY~IAaAT%Psi4u(*)8TZmFMrS3v_%+n27wx(a^6$S-lB&o z&LLJ%+k7dEJrk^EzAf;%NLgaHP7%Z-cOl=+YIpq&nEYso}Jkp%<@1YmOQ7({4O${?vZ?5jadviWDN|ccrB-W zfBbkc@3xhOAI#Ho@|y&Dy#UfwdEGg`i;}exy+4M7w2zLee8+ArI^exbANR_9uVZ3D ze|a!BwOthwBKS>_z*kB}W_A5fEF&{>$L+-)m+25Gh}Rxw@m`SlqiQXtK3yFy*xK51 z#wn!lOqyicXf z9A_e#x?4=7`aLg)<>-!$05X zG;2O%*I6%UADPq33=Iv1L_~B4q7o37Zjr2nZ`-MC3uMcd%*jewl^p(UTJkhNW+(pk z{c|CM;1FZK(_FG0th=RU>9$D|*YA|ggSq-}`UDbCzzrr#wLpt;wdB*bO+V*KLxGDs zwtSB%M(Rt5d)so{feKPTWWDLAwX-{%fXi`P8I6Q@VD*HV87X=>E7< zPC)@d`RTai_v}JW@Jf@<+2$aAb&<_t^UQO`#)gK(AP;P8Cr1#oMQ^a+t2eMy_WUtL z+I4JN0@-xxUNYTPXqU;Ts($KeuU(Ly1BdwW=5!<5D}wJ-!R^!rP3*;s7nO!R7*J#wORs=4!G%b<W&t64aB#*@;C4jkFT3Qe^rJS#lsCup| z2yZh)$f&5OdjI_S1>M-+zTpfcarZ=OC`nlzTqcRVer8)Y$vnRmi^dV!RXJ{L9O&B7 zsNz_9pk85K2J&O7s1Lc=*`^QSUK;=#t~+@Nl_rD4P+$(SG9w2E--51Ex)2d`L3BTK z+GJ3|$cXlL6h)L|FoprBocXUKl9H0--n_v*7;E$KIX_yW=!qoj1mJ80 z{5tP>Hkf~q;dM#$k{JE)=N>yeJba?wZ+34;gYy9~sB|LG^bz^$&{wrc%G1-+|BWJ! z$`(y5i~~8(xzRWsa!_0yD*}b2Ea9ca?_T;QLAO)d^Qk7=WD;LUdpJjH2#D2>r|W+J zj+J`<9?#Fu&vwqL-r(W(V)}`tx4b@HnQrmm1J{Mgz`y_k_I!I3&ZhYUaVTBbA{nVrnNjLYD0$2pv~tiB->${(I<<{iBep zg;K-07=yX#@A?*1YlZ9S*2p{4%=614)2PV2YJGjv=y;U#r1ATv@C<{Qy)5R>8E*oc|FDl~#bk`3mQ-YIq zM{4S9k`D#RaR1U?2PFUhkX%r@1XPLYT|dutt3@;?89~R5jc9>al6Zvh8Ab21j!&)k zHlZXO8T9-}7zMmZiAq|f4sF^bluHKd^zXMyj?=tZU`?OUt>F-B#8GwFX=&j#D)e|* zT%4U{0O?@exPjjca^msnsk#3oHnxgK4}jRcVdoPQ4cD`)7~|w2zA`!Dpeo|v;6z79`yRF4VL%&uWo2a@ z-h_~lS*jrzu(~*2PCP>*mmwR=lFWhgW2n)|J_jaVSLon5Wbm2JpN;oOUpWt)dk?1z z_xANksH@|nprAx2Bm_U*wLQT>0N=4Pp`mY8VuFLM^6EGAT;mUeobzyx%Jzu2< z5=uBgFN#o8n?C^##7S(!Sb>G+j!D2%8iX?D&$6)D zi%)GI!WT>0{Kx$&f9e0*Nl(wdr= z&7iX(?}?=TFd4%&TTu;8=a zZ}GVQ_o)WjVxQY{Goh0%+>pq~pzv^1HDd9zb%j81aB=s9t>xuWMDF&v zA?@Gj-hd0!cGAmK0X3W(fi8JZPU}55OgHU zisn-kke5M42(nx2dO~Cq=1|7<^#ov*-}9PNyrn#JF~J93l!KMfc&^zk3W{wMuZgR#30At7W^r%A!hsL9Nx5+ z^ZVRS_m6a+wtQ?x@O5j8w6Ua8+Lym`1rgdt9|AQ_qd9dwy{hhp+qr8TbTfoE9_yYF z=d0O>f9wc&20@ej z`V|HS0tk^cD9@NJ9j*)tIJvO<^7d#B=(Tal)+*W_JHRj4SXjOQcIAM4VzMRR^VnP@ z#KVJ^m6ZkYZ3Bz|lmd~7Bn@rxxJo{7xjr^PBNKeCTJ*d?DFc5{#ca^6LbuTmG;R=I ze}6a(GQj{5uUpM~q6OzUtLX+innlkuh33cGy?CE!r@%O8!>gXbr zvA-YZJYHcvu1K6ewA&nw1cM z4iow9sGSdHjqyE^{d@tBhT+~q4$5m%H;EMl${I8^y@GV6BIo>e7kdO85n%NO(q3si z_pG^W{@?q1yOv)_ACc7|VpCn|g_CvQR?)1|I0LHkTT^2SeJWp^6utlaAlYno5Q9~H z?d+T5&Gq#}s|5Kx*Q*7W6M&bsLHT@qfHw0GXvg1a7$vNJ)ClMv7>Hsm{SJxEJu1j~ zad8OXw8!ccQg{tO`)F%t_lv})*rxBZ4h)>!9t)t`|;UX^{!h4OmB_Z*xOKoy)%Fz4SJ)g0A2b|uC34)g-1YGfnGLX ze~d3Oo?)xVc9COI|IE5sv}f_mSCn*Oai{n(^%U@(kAPTY`+{-0szu8|?9{~)>D+y6 z>)3}qU@0x0!In>EWo2fcJTaeHY* z2JqjUoB|~!B?MHm_DHfGlM>XpLvA6*vEL*cc>6+8*kkm$6)O;>*EFxupmCLYYYC<$A6jgg*o>57{629nhk4eBI+A}bSSx)q4O4Z3?U7} z&Bt221MX^NU>_;7qLA|d_juW|;lgH?lfr}h(pQt%`+bUCp^B%c5Fq5G=Afh(B#2}O z)LAI`E$!ykXm%IJq5)+s0n3d6x}Z8&&o$L%K#92mib_RKFS6)eLGo#==HFznm7{+L z>3dyFXaEUDROIp6P`APM*GpZ6GIhhX-w}zN2C(DiJgcj#oIqzOd3+1NATd3)B~oI@ z$ZcXs^sHa&PhwLF4|o87l5M`rqrW+be;!VsO5&!F8jy&mFF|eN^RxtLqE674fJ8^( zeVt`i9!v+QR7o|p=MZS;m``Gv1LfACtY%i*BX_hMEwmW`*HK7d06GzqqyZSQim6V( z;Q{Izl!rvcG{L~c#6-l3th}v=u&|CshpkG-9aW*5_1FvYCqQua0IJ*iwp6tUFkD|d zIwan4zA);=vYf4X_Ti^v)$TA5nsN-F@(9Qe#HpCHKBwk@1g;Pp=348{o7d#78i0n@OgXMt^XiohCpu(CP!Uj+L#g z;1ruyB5=Qc10iG357XlQpXI*vt%2KzsS>&LRn}n?pqHx&23mt#d;qwm(DO^}4{r_4{Pf^74W!0%e1C0F_KsnFP&tnk~1P`Y0 zP3eeobK?S@M3kC_2J~Sk`_;qRrS%}k0jwjLr%EL#NKC?O$ystXW12xFBt!}XDX5AA z*elbr{(nUO-Isu${0rDyR=s9I*(mbv(HvPW>v{fxL@qe63-R|#Cd{vIZ&v|ihw?sb zlLCN|HW^9{)v7jG883R5$YX(j*tkVbD(Kj&wgm{U9zfZLYv_C7K_#Ky7#c38T{VFD zXVwKn1)F}TE(bp0w)M%+{d^BtP9t<2ajbwk>i{-4H@6ii`2s}IzaLw;41Z?=D)$<& zB+?t)rbAM%UxyYl7y~{M1a{YKtpyB3G6?M#(A6q|itsM2u#j;cfDUMNCb7!-zx11Z zBnt_wW6*xn#ztl4-+t392VYBLB~FMIjX+<2|B?F`?SOPjLmh?HQ%kqbmTP%+miGW^ zo7me?6Q-=)aTF}osmIvZ*a!&^w>YuGAQuXG9ZMhDN3WFr6`)!O1OP}-T1?bU|K`5V z7rYqVyBINZ0)N=7DWI~qitOJV@+t;eT1s|y3=rv191w&~f<;EQu(}2S&9C8MCACcu zy``nB5HTPdISPL8ciN5h5FijpaL}3Oh2E&_$ow$`%9xlKu&HPPvjZ(;fLB5CxZF2; zk3}6LCzXL9&dFjSpZ3#}#&ZRC6Ru1qe7%jmrojs=G{9cc1*l@*<>`L99AQ8RUw+1l zBo*+B61hbNiQ`GzQ3wE)>h!Ovsig0sf}^4^0T~ERfhJBKhKX|f)x}HFe}W}e@fA{+ z92R`x#4i331&BdFJ%#oG{x1yBw=kUWLSQ*wmQ-8K@xCMb=YSX6UHEd${9-?6bO_Y3nek$-f_?{RTd0HlMT15}G%qR+U4z0PSq&IpC` ziCl(=P?`onRw#mfL1qZ#IcNYPq7w%+H1K@xhy7G z0lD7+lntz?Mtqt-mNdY$4{pv76^zj-FAm_Ta&;Q&mjNGv;m>uLq zq^{dUq+CNofRCuNhf2nVK@SZc*_ZzR^V@HBeQTrxO^gvzyd|8(y-8|H4*u%5d=bpJy#? zTK>~EQym_EjpU48Kzu=CaFzQfR(Ta) zb;~lkwf*D;Q&F`>Jl|t6?<&QiPsQA?`%qh(0!SOFacbadm@R^`LXSWHoX3tqA-y;a4BZkI>BqoJ+_o0|VvU=RZMDx}@>o3i&N#e1LZ~HnOX1Tg zF@vrp8{kn2(^O4!r#z$oEc19vR9{}DbE?z`#&5eq|KBPAdR}=f9DFSa|GC9kSUrJy zcV@$y%SgnGP&bU@`@b(<+Pr!5CpAsssJ2@g%kifP0@c3_NB3H-%LIa3gOj0+-AuSC zL?eUO9Gc&>P(fQa%k=)E@*q+RIl`+tK8vS&vN@QR#nXS^9T~c&eZtm>pD|?r14H+n z>t(C{GTl|P;E8}nO*e>|TFF3Fgrb~ ztS@QX&I3%to(VYN8B&HU+*;hc0>#yNZl$c#)j;>XZ5C!*lS{Z`Qek|U>aBK<4k>WI zkp;<4>un@Aci(5HV?vN!ZF%Q!F)N+ULcu8~Lzk2Uw?;eH>z3=U$UP=}*`#GtPp=BZ zH`~iY!8p*vnFtoW=(#wj z9Q}I%%GC=W>W@Q9l%H!G&TmV4@11KKzUg6FsTj2MOvQf(&8%GMf-N+G;IEQmW%B~& zbJ4oUF@{f2WV(yuTyEn z!v99()3ty10Zg?C4OkTM&Gq?RhsMi;;PNPuNQ);O6J{0xPvoSfQiW;^dNCje3u50D%+w7}io6)s}t1=ub0gZ=h? z=+)83nIFa>QBksQ-+tA4*yvB70kkw|PN*Q1%*-fI90?dh+pRPgz$r90OWi@MzM`U{ z2~3DCyJ-cI_{fg!!P$n~!?G~Q_%qH|RFc>?D?PTz7*%gPsP&!*P+Sgt=^1*|oOd!R zIvmCsQxHYR8%|&J8_PddV_~Z>Ocl9{nUC!_YSvqm_4W5-6A+kI4FMUH3m7F-W`GzA zT{ATe4V`(@jlQ@B03KHMS;;GWRyq*i5fP~Y6b4UmuNcYyB+@$Kk@TUf*~m8f86Y|jgP`k_Mt0w%ptb*`&T{G|glw;m((#7N25x#C-6XLrZY?`{=Rr zcuCzmKQCoWp!bMW<23gcoQ{zpkXu!lWVPQLfUIXr;tad=~u(V`l*ZtgJD#;utL7yN_U;p&Z&b-BsY|hT{ zo^ajjaltaB*h1s{22_dWTftoB_arstiXQh5*eA)Z({X~rWJO0Ub24nu1WNKAAmUvd zco_5A+0{A#A1&wV6x8aR_SDgcUm}MICKdWzuOI=jR_J=!KO+5InK}&<(=!+t7&aUJ zTAiO^_PVYc(y2*jl*JAXgLQWc+_-S{2Yx-u{SHyHVNYIHonF(nDq6p%^t0EeNB^yq z3n9?d0)*H7xq8f^q9Qptx%QHZ*06;pmo-48Lmucbps8l{aSbEB{lnIl$=X!b}dM9+$PiNTcsp%7nS(gHnQ(r$2Y@EY*7v;(?m6!3yj zo(^i9QFwDqOvk|R56uiZx=$y~z{=DK{A0l&6zA=F$bdxwnmj(Iy&WI7+W`>p_Lm30 z+nu9YUVft06|pD`T>Vkd@OTQ_{(?7uP%zDTpif92GrO+XYyn06KYrtFnX#f%-%{Pi zAHNO&4Oy!-1Ndya`|A_Lt3ALg_|N1_mFZ07UaIt{*`2@+2|xHSzVe#2Oc@`=UUio*=y&9oy)^HP2lZ?;BJ~q+3`|3*@{J7C4}^y$Hg@q zpN69kxq(Be&CwAl$C=E+Q3m@^cpm})JPp44Q;*^H9bdWuK7U0jt)Vy2}ScU=B7^62WinAPqS%O zxuU2;-wj>f$L{aI8`LKRg><)X($H-^xjXGY9a=u7p|Q4Vff{|Wz=uAGJ8r&ye@xZC+m#>ZIcAogB3OfxZN1irZzpKr^_vz=ss_6+aWdOle}Wa6ee9MXD8aSzi8ZN4J9bI?p!;SK032hbjk&UhSWappRV(xW~tZWU0N& zB2P<<9tkpqFye zd|_%j=3!-r4WCx7B_5JFA)%vcdE$ETMgIL+>yM5Rm9bPxsRHi(DXcR5-eKxAddgU?)JsUlbiWW?-Uga$SC>?Q9KV?uRHe zIEu`-c1QM9Dh*$O=(aw6ym?iZ5~QC*RzY@rTE7zz7HK1eY*#*&G_Z2Y6gz?8^r=dvGh&J<&0ipy~2@`hi|=s z9r=v50j3iC*LTG}I}0gcP2ENeQxez$rIe4{Ipkj&iH} zER;fM9WI#1JVNCWfJvlm@%LoN0qXs!b5a+>i(1d*J(a7975;losyBQaa4Z(eTH{Zqqa(6U-Bq7I^68Y!1^a6;?sipw&v67 z*i(6&BOl!5x^C>JyiRyNQ<&`@s`P{2R}n_RL$XYCq6vb5cb z@X96Ac9?x^i@kp6->$*xnJGc$kyvMdMLXv?Bh#oY++t#`)Uv1Q;MNyDn5&`ko6<3u zJUTJE7%EDx$NEbT!)VoQ9%QB!Od4wAJTsrA<_F~0I^i`VcPXC25eQ6dS|-!35e!i< zV@I6#1JFF>1qdTDZ6G`)!D2z6f)gNBMn7VyRyrUJ4SA@k;g1Z^c^()~_a;N&InjiZ zc?h;Q^`At3Q^{wqeZ}#K|AQus1>8cP`B{s}a-W;9iv5Yz@x4!u>u_gamx_FHRV%tr zc2T+b{O3jIZ%9|e!12h6x)}<{#xsBv+8vokMDqpa8E8d;K)j?8h6cPL={Y^rA1_H= zl@gJ`HW4q7e7v;k)oqa_!fyUvj@r{mm@Df=Y z&H1L8!tU!Lvcyyc$f-TLQ)$&e<*aw;EZ{B*@d$nAm&Z5Ex1BcmHpvvOzbR39m{QsM zXwocZPNkfBj&FDjKOlP@psT%CgDb4_qS48cbs_pT#F$b$tl(dp=xo9BQvIzB&h-7g000bAhypF;?yN3`z9`WMT*}H`pA~Zmt?+kOkWzE&3(b7 zHdOqZMYX9NvW}R~u0iAW6!@FP`T0BYAd6uG<)eAz!IcHyNeN{t4KV>DRy9MLK;CQu zcCwaFS0=m(dYJq3qt%K<8?PWmYEj3TpQM5a+NY{Iu+FJhP*+UfkK1st|J>GejnRG0Ce~FM}`NGNby*ZID zql#8vl^TDcqtzItIlkBH7%i>7Q8l@Rf&N#;ea)(BEv`+4tjn)D@SyWv(A*7wO|mYk_KRZmhH&*S+&esol?jwJ<7)B+ABiIZxV(7t)m`)8_r zQFUg(w-w+MA396)#zA!cLopWdv`9iyTrZyO?cAAf?&MTx9WkE%6Nz8o{u%noMQK+E z%ub?^P`9N{Td(E3@Q#oSnj9Yjg|7iZsaXy?Rr{2uG4ND*R<0>CzMQx(1_YQn$F)XM zVcP3qJ8|!r*h1DiQ}*10%BxN}@aBWwGNn9DebxFb!PF<&eEZW- z4XwD-q@KrRz8b6IVUIfV;gRR})OdVU5xtI9}&$-~TNV&As)V-L=CxTt( zYRNt{vv69C)44Wkg!LiF*}mk0+$q6}7g+Qd&Vs8G?t9@G5`s@TIB;*}w`uiLt zoLj34OLL&1!0$j_tV9u{tooU`M6>$&X(CMY&xW}&GIqs@M8VY;ml@k(<7g*t93L*E zUMHa#A{PAmleHu8Cd8Un!VKGbfstID32g~ds5=K{T*tK>D|JYKigh@T$q>AYF>z-= z>*O3b=C2gm7x0%_t^9+@RbB4}tPJ|ku*a4kgLNDDf-6_T3(*?e9}SeC&Wu`WTsml%up98OkZ)Tg2Ft%L2W;$hM%4;@;ZOeG`s-x#g1jc=R7)TQHK zbsodoH!Lpqk^oT*MJH6|NV}Kb{KdtiG&Pm!vj-Vc)L+1vh_5n|tEyM+N+Q3HyVy6@ zp@PY4a2q#n;5U=n5NvsV7m2WTttG0c$N(Lk2WMl&S}}sXBe2Rf(c-c0G!1rRQ2{3D zk=t9f{NZ7ljbW>L{=kA1DzmJ%D7U{I0jRChZu|kSQNeDu959I;fmuF|aX7|oERepF zr^PYSyIJgGqc-xN#-Q3!9LSAKjWg**#KBD(#ClO||CcG@=#N&(Y~~l?=+7 z&55dvM;sn$?-)4n`p~{lKR3kLk4W2|aEhKQVh$obul*5?3ftHAhzW1ylv4YK86oI$ zuxmtQCt9vcbZ=D^>AsZ@HmPywVqXDd_cDVq^5{Z6D$VoFB}+GyY>WfPv_;G~)$rcq zp;2&u&-PGyh^kd?#t8CXc)2qdd+Vcu-{&fg#f)63=08kKd(pz1D-2F$ML!r>dhBne`(^mjf?NJ2oPEb%Rh8v!vCDglwAAtZ0|kdx7FVE|Lze ze2!)W3dZl-%D&i-F!uU&%$6HK?!C}nqq>k`NE^H!{MPJ8`D-OW44RnaGT!#)rsZga zfqYtKS4XEpQZ#p$rM5l7LfsajZKwmhy^1~_qs(kaIuV$&!Tu0xseD`JJsF{v+yjSzgK@000!lrF+hYF z_s3@`WrzUlq(87<054Ca+u0`K6`+|g5iv-Sk&uwULjE(u0i~jNXmLVkN{Wl+)sBD_ zLekZhpOlo8({@=5kbc9vQ?hBYc11WvJD@rjhIlgKrBy%^+bkSdiSRzhgm0OPRYein zwE59{T$}kN+ayVbb!-LgL~TbqL4K=~3~b|i@)9>;-O6p;Q=W6T#*E&Ndf$Cc$ZHb5 zX?vB|Me8hwc=h-lZ#~*pTQLmZt@M<|T<^JJ*`xALB!q0sO#Jsp5k=|f5crlnxuLF4 z;1T`^e8;Z0I|Wt1Y@i~{$HxaG^e{aP3?4q}C7&l@sN=BtcDIDzZWSJS1aL)Bv#|~4 zPrV^a$hyig`SeAiTvAO+Z;q!|k<@e`Xq-b%P5tIfe*1KQJFXrf6n=Y(1t|w9Pn;eX z$$ukG-g1%P%Q#xS`=xba=0vvVgpbPY(7O@Rd5m$sXZkwmgI}x|y|$RXSKM;Im(@vc zRr9rw^Si#l^4Z@D>YozIn$k(<2Nle2r)TULdeTt*!)9{00(+)CDj$0}HBj#q8r* z#?x|)EcvhbMhEDtfR7jp49{rRStbpZ#|f5ydqJmK?iSML9=Ii$w}0>Q2`8_wl3Cqq zmszd!(WXiJhMlz#{++CDP<4(glU|u(n6Rvb?Wt~p2_4(>8tv~smxwPty!rlBwCTvx zozqN)j~Ji!Bb+G$wlCVtX{h8{S0_pzzE!vyOvPY9Wo|yg6a>l{4WIUWU~bzzj`;ik zs)tMorLnTGNPhmz2z8e0ST`a7mky`Z*yor0&ijcocJvC#0l>qdc@kDwE!VJ&nx%de zteY?CSU)6i)rli*q|Yd;gd1lzOv-bPQbV&y{yvd+7w4yIT#D5zr&d&Fif1ylb!Pfm zcuQV>yRk^p#{W&`Ds1u24->eW#2jlj0!+c|5iqRc#sNE_c*z7c$1o>pKCm-i z1Fvh9*%*E4hbDZe=}E+spIx^B0_pAV_g5^ac1p3IGJ>)V}AknK$mwDev5_=Qc-NI8O7k? zQf7k?9Mav%!{F;KE@J<{cU|vn8lE05g zo?B- z2fWR-+d60@7iiwsW=i41z$!Qj<}jeb4b+woHM9b6Q}3q_P+OB;7_c?i~yi-CHku!Y%Cgk{e^?{26dsp}QBce9XZ|47e5uyt&x zau@yj{UKPA>+|=*(%WP9&x+dV)f4fwIF!uU5Zs`Ai6X4w9%YO5X?IG}KSk*#cRO#w zvjm#w{vXDU_|1`W=XxodIPIYC+9i$BGJNEogwoxR7!{t3cYhZer8uxh(K}3F4eQNsI__4ER93)oj;dY&SL?-bXPwnRHa-z6*`Z3Vu@Mf}v!0CT*z`m9za z{Dy>>pr=`_o9KxH2OvW(gQ>1O6&0F);{4vYi`L zrs_4IeW7;-p<7g10e}CCMzj|KPuq*gL%PRb^a^kE6`3b;T&_qeu5x$_kkb9SX5Oo* zpA5>5Y^79$O_19g!80r_FyD}r66u?R07MDal(P- zRP6QZXHbtXR89ro)xk57BA$3 zdOZU0JN{I8RahHYsJbZe52Xf&afoupJqv6cX@1Jp`fvd1_~JFfcmEzSo=f%KCh4S! zDkMGvbZc@aqPs|^?%I8Az5zdSjUn^Qrs&|-)&feFzJSf?#pDA(&i_{WJlWXH@b+bC zq?2&*C_#;NNO5hP#-vbTjQp3v#lbmI!O4*(BVCo!%af2oV*nH~7aOsYX*P3S&-Pz2 z#tgznTfK^uq=sqLc|dWO1>24OD+Pptg0K00SDwG{x$9O$Taq6pM7ij>>s6WT@!ZkL zw9knBVPp`PFfjz9y1;D!9l8Kh{r>-1AXKrAJU+I)2X10lU=W4+i-GdK3gp7y;M+0G zir=ZJ1@w@6K!-fvEWB2~@v zE>>38!tzE&)k_U5W;4hbho4YHzFMaX5puNs#QF8B)z*n8qhaUp@N0B5dxLxJjnb`p zx$eyJB`_M!mRf?zOMdVB2Dgp9M_|re1*14NIa}VRe;7{IV>gwoLYXICREPdtkO*7e z*HohZNfM!m^xEq+573+zgn`;8v0#LqEZio^$nJX<`}AX$Ldi>-=)5s$WXo7GBLTrH zeWit+1NF9NY=LaDyci!Nm_m*gCksX*SIfOh2irkn7ZuI@nTs{Zn@`J3%z8cj0Fd;{) z52NKS$h`H3g~fsBe1dPD?BOe~FQQ6fuY1V@35Bvh^B;ssr$lCR4u!GT$iB<1PoK{3 z|C#!&IWnDTMKPO)D=w#~%0zmIzD%g-@XEJ1hK63Ql4eTsiBGpu|EV)$*(3IsU5pv8 zmEe7jHAhfQt6`qh)m2WdVI&b1d(GX0Z?D|HOVTvFqu?R4y|~|_yR2~)De?p8^+O&$ z8h3N;pjvR%7@amcNBf=V^_6Vn!xUuGuNzPE(}tt$!5<#?KKzsOA7j37qi@9{*2I#Q zotxa+O%04`8-ejYsJq~!r;x9h!^z8EXjVo=ad#;^Y{=vl7%Hj-)K9mx$6~ZV}doD(Uy$8K7U)x>hkW@Up zUsh?NJD`=)*{M633fCa>3LF2!gJ^AMYd+kc+cUACP?OEbilkpI6i8Hb8hT!~?T_VU zZ(~IEx;t;Ah%Q`wX8B6A3gWA%$Ium=2G{6NF~$VZFuB!DcCzV+Yn*sEAxtMyUTC+U zd2*xEBCH)IQUeWYQtD|NkL-F0-5rG5#0=4fY0RgsPo-4%D2!Kh@^e4x<8e`j8_s!E z(aUONJt-~xT$FlMlsaPTAop!3Jlz;uviczQ_0<{8XISEhsaQ_zhgU0)hdh6Om6obL zS=v|EE1U7Vu6>{2=3}3Uaog!x9nDKzX?_-Sk5Dt*b)Q+0P^p!biIxQEyjOjQIUD&~ z3DLFvxq9?jo)k}=%zlOI_V+O9ALMiAJ`lg-DyEnl&X8(_C-Lav*nOO8Qc;*ZvI}!8 zo+>xG@Y1t%eR`DV6^jkyn!(kwzG`#4h^*uye1~}ogx5bUGKj987X)`*agm-PxT|xr z1_xFg?{bCRkhF7sJ3yRM!4Bl2Qr;B~>;~y}xYQaxHGQ|BaU?E}D?e zK>1ie=6}?Eb>)#w?bhwJ@#r3lks7i{Db9!9MH!7Qz_ZLQWYwNC**V3}wzv&=)d0PTJ8JGOivLa{J zm}t~=9zVTyUyq%+sfu8E*Vat*F`cZ8CE+V=xnL18`ztuF#pE<_^DI45D^4Pwjbti7 z1crUhoupnO9_KPQFMSur4N4x zOizmtEv6Vqcprw{Dp5)6kMY5NQP-?`;zcO z^=+J&5%REvo11u4g4iryE|k-9ccB|>L5n8U&tju-Ny^J<{mj#e_kq7I{S!*DO+N9dH&~E(Q3hT9 z)(W^~cG#KWB=Ly-LU-*(^dpsZicd-TJk_N#A=u8f$<|fp&syBhXy4fVFnjW(ZCulr zV}e*NZ>KIiR*Rl*TJvpMK8beks?w0Ew%H#G#zqbw+K3mnQE#@oPHt3xBx%Uoi__6h zJahVRWPY`<@~d^q@-a}CT$bXltIzUor{oq z&P^MEYSnqOalb>lwnZ}O%bW(|1McY+0Pz4MY zYe4oO>BI$9>RlzNkv=NjIw5A(Yg+gd?LMztWLa5BYl@^$Fp0LP{Q@A7uE#wS0@>M_>zIc8NOU z;PN@efr%I&YB8jHQ%J;e~ei>v)nE{ymty3WNX{LeK6;ftbP3db*t&d+Te z>N#JLwpFNjS+(go?PI^qDQaF2=~cgtKg54!Tng)rXw&uOD0U2V^(^XXiKakoPOpMj zX~Pb(80=SNUNQWT<@orh+WO!lKP&t17q{89r>C-LZ7xhx*) zvxD|0TiaG=LC$@zF}COWXl6&|OB_td!|mf$_TTCVmeEa* zH4*6f7Vh;V5yg5{3{mgtdGlBE2ciMI%SFkfNqc@G1OM;sZ`5~>3B%;VC-xnT(3EAD zO6&s_FZea)qm36G%FC!8S6nGfo=r}?M^VV(tyL?k;3mR0y&4^o;)R9j8Cbd>^jpTM zVC>d-6)UmJ-bQ!;A{P9{bF%8^n5GCn5X5_7z7Eyd=I9rgDEUg``cGPAZkwgdAtiXVbx+aY>!0mw8dmt z!nzu$G>-k*=Z5f}jCVwa<$)o*ie~)zch)kqWvs=sOgsGp%*EvF*per9g|j`LTuJ`g zsmdYYwJ_yig1hNs(?}$s?AdMQ9V4GCH9mRrEs0PdQ;&VHAXM>I(yPaG6#*pl>5pYP|{OtSAH zJ5%!$6&MKEI_z{!+->KvGEbZ)M~sHsl76}& z-}%$-VSeY5CGd01r@?BUGP0CK`ao61!5wf6oXE6RMO zBF=89fZ4$>pIGFB4BvvLsWa1}4sfOoW^eUT3-GqFyowd@+b)l8l+;c?iCLZN z-6??bkQ)xCGJ1Q14J>R0jfY|z^=ko>zeWZM#IthC)lN@h^PG+cIFnRcwwrd2zubQl zRvjZ_L@X4K_G)F%0}=xzAmoV+N9w%jwBqfyMw!hMW<`8I{3Q)+rF3PlB4!ajad#FDpv)WhFplg9dsHHOBcjr_xRFI<_z z5#IW31b@;fo&7=tZ0j+!xyHSO6ppaMXnDu4WGz!;uBpHOL+!k^Yc)(*0}09JByK$R zOS1c|zL}4AZ^yk78}-R4NtDXC(%}=j5svr~__tb4%u-Is)Q-I!Cx8%@V2Hor0H+#d zi7TJXg=5w+P`f-kG>Ae~i<>#CWHOa7;);=e1KsLPs~?F;eG-X@mY6a&T#XX)R#%g( zUVbw)z|wcVZI{)WIqs<=d}N}Oe`BCEn5E`~lcQ%8_s@R#?*N4aaUC5dk4ZN&+r@JX z!#pPx5OQSGy6_$%@xZ}|Ks9vV#`{3g*OkygU-vQU8Xw!Vo~1xipMiqv?|WK<-A|l!lCL>eF(lyRR=07|4XM zaoOlW1nd2Wx*YR>)6#}E1u~vD9R@xo)BLmdk*_=#>0r0!!Kp+=Kt5B1G;j-JMvQPM z_`}ubvV)ADc=A)j;#T}N==|KKdL>UjN}AxV`!7uKEvbK^n3R}D%zF{2;>Kb+YluO< zoGhj~?}(aeym(U$3TuY<%c%m%Sw^4+{_RKlC0p*K_Es-Z=7#Q1A3veETU~woG3D0f z{hPfpddv_jys8=bFn#IP&1#t(S!9l&hrZa_w62-@0FoE02QQ9Jixdcm$v-$wJh?6_ z%T#l`!t{ZX+Pv!)fsUUtXMObbnF&14L@v8+tE%=hnZ>=jNBHlrORIc|jbV`L;lx$O zyZ^FlfluL|f{8D;QfU4RRUzi4YGMw-#nYiIuyGE^f>FtEr4DFl6js|Gy_tDh8K|X0vjDw5e!MWwov2z|+T<*n8^{zHkl>V{|RA(~UTxN|Jd^ zh!V>&iWTgxl(h9dqh9~0EzQjl#*?w$WE$8ajaVXVYC3ycixyFhb${am~TwR)whvKw~i`u<05pM)GvPMXXbiw@ZYFdvok3yOzLRk+KZWc zDXtrb{;vCN>y}oFy}EsHZR)!BI7S*Pk&WAVlbChamDjV2h8I4d94st6RasytGuCb5 z${*0e3Tbw6=QO96p*3-*J&c6ql$|OU*NM$V{frM^&jjPlwWx-aH{Go-jb5D?{p2ej z07W-?6=q*vzq!brZhiClc+!~r;ky~)Z$EwCKPleJ>hHr)E=uq>TM@||ISPK(@+4kt zPg3U|Ypr2OcAi-Mb|gj;rC^%o@YLx0qEr(8{Z7fm1VMqsnc%PO6PdkVE~D8vZ0o!~ z>A%^Dqe-b;OnzmO+%#=@()%1dCZD@iM>k$avc7fI;i};-wr*lhzm100zHEdHu0$9@|EUv7*b=XGcG=a8 z9Swn~jGyH*@0Y6;h9AhGH*9|9wTWM9>nO_jbY9Y$Dd()4QU{UyeQGntdXdJFeAV*R_hmGI2#;G4}NPj z6?Tu&(>8kSeH(#--JoUMfnISuR2%xnDQHPx8a>Y};P)?@&d{?fxcptx^PD+~t<7dP zuy$1;9h^CGCPHca+n>v-N@;Mnls7^>R5ZLia3eAx$TUH&YRy#dn=hZ)VAMke#{JJDOr~YBQgi$<@DSAPwk(yDm6k{>}-Y+dBmchzM36V0S5FWYg zpYSYC??Kj$-k18^xgieLmCxuM85lPVRB@^BUXG!)NLBDY9{S_UBu6YTe(1jExnaO^ zz?)z$Co)49H=)B#S80V8NlQl3o2k*RreiPSb=S3^j9Rbd4z8yKt31==Ao^S7FM{BZ|52zI4l8;UyYa(st}JPOA5MZS|}dd^ihtaa1IjPRz<84ZK|6 zppL!2F0WGN~~2)rCK;!)EqU^?-E%ul zT?G6A0l8K6Gi{~hz!i*RpU@-`cgzTi8Xu1Jk2+N2P12vMFHz@uu1qYzp zXF`+kQvHmlKk~q&k3%BLfuJHMDyb`L`Qb-YlffcmHtci!_d9CkR^;GKaGkpzvtWdJ zKc8Ze8^5n;Uo7-69+7I?qT%d^ifVCa*Kkwe!K;OenT)*ftwVO%()nLMqEo z(fFL;4#%f-l%Xs5N%Q5(6&ErO47jH-K7Sk7xtHE2cy+6=WM5~Gc^R9p`PIt-J%gtFO3VcOZZ`zqgB~??KEcJ3ou&|qL-wWokabziG3ahi08i+}znUT5Pogjv@ ze%yy&qyK(qD!B!>oLJjEBUyU#EFnDo=N$n3G37g;P1wiy?2vPtZRkHf05`d-@oLGs#z%V|}dScLO$+MK)* z?X<{gvq-l5mqXamg)(@NiH_tj%N;Ikb2~q%NH#~sP@`NYaUfBIYNC_>sV%uE8oind zIYOwgX!5xa8~XAH>o3wQ`$2L}1B+^90UP%^tG93V`eM1F6sh9qK zCg!+L(A2!a%b@_?ybfc-gPxqQOiG*}zNMQq2Ddf`NZZP;A?rNfeG1adPp(q;E>_h3 z`m%yfxNA6<|MyLi#{~L8qwB23$)28Qrke{5vu(lGt*L0e)RlZQ@&{I!n(UHk`)ppV>pG~8)4{=tUkg<<>Hr0E;&n*Odq19T~$FdI=`0pn_$@EMi_w| zMd5j~1ZU21joR%JdsJ+xu<3OO03uYw-Zq9n<)%l5vcVbnkH zu2dkV_X#5CDZ%7WCW##TL2qtUG&E>HkVoC?g)h~8&+fjFsE3xRE0koxnRMC5W&g93 zW(=GW?%}NJ*3UL?t9Iz~-Bzx2aoaE4Q&Lp^I4w7 zj26f7k)VKt6*doYgnFq8>r~~476c_$)LZtgHI`N9c=7)=iO)`P5`L49;`TW86-3;i zX0k#j2U^-CYHe;_*y^3G-SAabg3sG*K3pDt2Fuu%{VXl7Z(>AUxF^{ZTUn8qvlCN6 zI#M?eO}l8!`Zz`yTq4f3#XBqz_P%4ph?CoqB7NQ97T#U=JoEElBIolwi-Slxy|H9U zE|RuqH)yX1S?pKgN+5QED8nL`LtteUJ*x>e4Km6&V0_d@aPRWoTtZA>A$x88{!ine zAUSYknWr7%rOtS7{%8s}A>L+2Lm92A>hB4>ify^itOAPV6xls}swL~MG|rEGb#5(2 zPK;^h84g$ca6S%9yDJg8t2#Fm@>fv8z z@96VNG>Y{&XsgWc{j4NORcDt1_X_(FldWZUngb|mBPsQh-eR)_J zokM$N#hQhWTzA$fa3^b2lDB5Y>ryZtjJ|axeO|}yXj_uwkPm0?o1R>oh5R?~-MK2^ z{UbI|wqG3#mpsX#N10#_0f~lQ7xfRiH~V$z-^9|9<8LXzxLG)b30(xX)Wwl+{OBaI#b>@mEOB}ggIo!T zw%OAq**f2%?^2bRl4;W_d08N(dVbWhuH^NX?}Ydl$JuJDJ1=X2w4#WxdXkLg=hEPOD9Ls2?gSY{GNOjp?BFviXB1FmdVF6ILDBz0BCsVlSbaPPe3L z9^)XKuCau_X6O^@o{XSRI*j?$37h|eW zvs&+nCr`sF%%2YPv*Smjs2`_Yy@~rS+qpqqH~V1cI~=y!ogTR#bySi5RSioI#^+g( z5@Q5q)-jY?)6!_9_2X&VaqK7Wxt6%| z#iQP!ueisW^F5}Et?nPKZK@AQ*L`n3?Ei3Xd6;9rGa{?KC;2AbkT0%hqD@e}fvci} zd_O)^5@y`sc=0R$)6zf@TY`&?f0K|^zVcEZvm}Txs9$wh7ku}&k*Klx?P#{MbcgZn z&46t@hMr)##KcFt^;rClgp&vsj9@P8G^};pEW@w?f3lLdMK3x&+4f;conzGHT_Vsf z&rK;QxYaep&JwRrdk&4KEB3uoe)l3^Va? z{9M{YRbL+y^m2If*2@hoTvF}d5eT{Xb!X9LIlc_Px~uj@%lwQ7+thHC9tYvI?xspq zWW9Wuq^KvEM^|k6-3zc7YyU&VGp!XJ%3M@I-mAqxvwcU3E zM(AVRUq)bqfdmuh|66bI7IToDtri1g9lqTWPclZP2(vI#VWTlh3ui2@I6cb6<~#DR z!QI&L9BoVw-BT5{;euMKMmVDn!l_+*g^*mAcK~aG)!6>L`QZ0FDgXF4H2aRe*XCnm zw@;hump5c7hTAlZ*oCnguNEoN0*bjXEW>8Uz~*PoOLoq z3o&A(D&gSa@uuiW9Ja>!kE_UHX&#m}m%Ch7xoY!q651!q^^CYslD*R~aqjr(GBibC zJL**|c*nVIALgpZ<3;rvl||laDA~Jjl-9~}N6Ft;C?kPksLz7Us3=N+r-o2d#}5^9 zs9}-Q(67Rr^sbemdW=m;A|N2AVz{Am#I^8I$_VmEQpg6EE1KvL^JIVSdAc$=HssJ| z@c0YM1b}pt-FG^@mMKv>mGZ+BTTPf&ZQxGx(}GxGR~yx3d}^b3O7g+S&*T^j3cC{^;8Mt1p%umCCbbXwmi#4Yh-(Ev~Q};U8pQVNTJRLS!{}OGokLYL(+k-mRUoL3(mTz}cj^K1m z>vu024_1b5$aAq4FsAMVbkVI+zgQwKH|6=d0oA_!uWm;L0k%(^<#sMPS40AqyG+cp zB@G>=yCW-haq8q>JtV>$uD#`({mPTG97ONeTQ zj=BgOs}m4D_IY|qC@^vt-=wQm`5jEm=g(TScc9RC>5|?3>coTZcE94jnTPkGgzMG>_ZX8kA zWi8^=w}g-5W365aXIFj+YzumqsaJ^&VO)3Gt=JzWJW)ZAKWbrcK`Em)A({_bFeY0x z$DEI3o+*86kRl$ffF`fcptdryd%{LI&oRBV)lElVhFOoxDdoAOg2-z(E+D&4q7&gQ zS80JM#5(Z)R*4!%UviEiIlLA4bJ|#GKTpb)K`j$3Q>QX63tP_R^}`>a$5xqFoQtX) zqU0JQZe$MSvn?&eV)s_YMK(_eW2E^xvT}rxb#%5*DZB>9$8HCF@arEKH@3744k`Q@ z20bG8t4QajtEZg0UUVBMO7AD@cTE9BjWvG3;a9>agt101bO0w5<7;?$;H(Uc3t_`|5Txbt@yQDs6bp zku6d3qCUMtN%M9IB+Gf(quOgprJuvPLp`gu7UTtll$ih@!xxrK_LH5|0 z9P*WAZUa~Rm?9Sq9<#-|*SI#=>c&&?4l##Er36LI&)d9fsRLWro!lefIenAwoH{hx zG1zLyBi2JCaK((uA2;|1@{aG+wxA3ZHc6qss922tC`K^8G&i1-L@ClyY@@DVwek6d z)jgbX(*mI#EGg*Ac>l#)wGsR6-BnYKL_z9D$lOoX1~;v!g#x2>h+SIXg#B>lZLmHt8K7=mr7wEvq83^hwg z<)bH;iSxWG&*FE4*v{Hz>b5Iisix2g+-*5(n}<~L8b6WpP2M)`JK)P#SJ9u)MFo*Z z_|kRXJupIhD@j6ziVRb^N}tRur#IJqvDgdklyOMl<}ln9Z{u}b+Ed3=M-Le*CUi74 z^mDQ8D;my8?`$SGrpN4!8D)AMiuKk2PiRb^Lgj;;^a8xXdz(X4rISKHGoh;Me?mGH z&rj3b-^#D^yOR|Y6?sgAY#N(eAbHlj_iSmhU}H&_{*;wPi;n%#{aYgGPZC&!xd>4H zQ#Ktu&abiOEp$D<&*;`AUV#Bl(RimtT_nRnYWkE#gW;Cp<^ZN|fBVpix6F&LgtT>~ zlqfax)~aT27E-2fGTES8>7IzTDf5cNRx3=R94(t7hJ>q9=y&c4MCRBLmPB?yH}=uy zN9M4E=+Fk)&e`c-kx4-X7*}!D-c3z$SlehMr>F<#Xxm!PJ?65p4Wh-YOY#=kp8m*e zF?=WG)UQS^3MltHH7^Fo;D|FtXE@~|P_3T?1+ zK7~?%5cko>cPQkL$|!JNTAngl%6(%l!{>^ywnJrYTOTw^q3}_&mMH_IC+i?cP_}q5DK@zb+v4*$ORQBr2%rg%W2RE#jXN#!0bd z#SY)zieC&Vkws6mp*M+pqs$U*qb$$eU65cvQ$1a}jd%OTNG9fi2fo^3->_{tc)}@F zNyMIuROv5o_m1o0xEndnoDnMogsRWyNeIWkjEu7mL`NSV%@qZciFkNC;rFvr&G_U_ z*S~Te)@~QL(ma7jzY@2y@Kg}`?}?x>dP)#?jrBVLdYHpcMFmDe*J}-Hak_9t$=KbgZky1_%tM%O$wWez;3% zf8wd$ftX|JNjH?=G1u*O3p^8{q)w#gO&R93@PT3-I1X6-?%H+x?3)MY4+{~WY}R1Y zIHnuNs!tw5(~3mqG5Y7$y}*rxA%9SMDg-68_NIUG?_q9DVWEntC|Vv@r2|myNzcTh zFe!!DH;xh5I_skf&mX3A)@`83{fs7pkH+J(7z?ecdj6=pr}_!4RpLEL$KPb$Pc~1O zcXr-Wh~kGwL};>D-|mpofDUDZtV8(?&?ST0$6o&Aja5Q9RA@#bTlU3*+AH@7j)}%h zVg3aZ8DZ?RQyIc>@ijs(Bcr+53@8HW>v9cuo4$!?$R_kgG5B)Noou7Zo$3tzM+IC4c*8QxJEsO0c7Pxe^C)&1LF0JlQY%j!)_^nw6N)rXI9;5g-8rOA}6TK4> z0f+f_v=5ycszQ#6pxWOD;p9>_6=^E%XpPy$$D}&iAaz|c}e(P>JKU9_Gz zdf6#bCp`a=XK2K?kzhy7aP=$q5p5w|?7o+VmMLvGLc|qqx`9dM`IHy+521*MAW8Mz zY5Td|rWM&hIcR+&6_24CO8l>)9f-;!!97U5?`=@#e-7p8PEde7y}BACyjgVvfn+6( zfp{5GB^KKIwm?7r1*rc5&re!aHEj7Yz=QtOxK}P0<;I5wq}`0rX2!+n%k9c+^E@H?6?6q^GYEuFTfD_01psY3uj zLK?+sXjL2xg62YbxI*mOI%aI3h0vK1c*$M~8G@$Pomw$yZzm`TQPQqop`!AixWsPw zIb{5-s|L;OauzUEE@;0(4C}UUkZyJHYd<^sMmBcu1zNfi6*fQrqE;d|9UTfV)-toR zeE_6_#Jnw;wC^f`xZKnt^G5LY?yfI@qCjdH6&3aOf%E?sn~nfrGy6Yy4{bPXICdYp zJ`r+lrO?&yuoiqLEpDN55sM^l55L!soQ{xBBNMLRY!2B7+Ii1d@fe2LReb>Jn?_XgvcdJwX(9;_=Z!xKyJ{=497vKER06He}Y4x560 zG@cVfYcG(L5g6YG!0Q81`R`WaLTeM%6iF=B zS(+v!gAWw?py6=P?l(xX(;!8op*zMG04a|`-@!5jc}N6;vuXQWDJ1m+ zdWxXo<%(HiXY{@Yl8g&q4&7F$_zYPZ@87&QeKGduqGN*pU)WNpRf|yrW-11h}(*QR73cdzI*ZHCV>Ca-%}QF5mNf)$nnd zH63CKwA9NsYfb^rgkflCi0pELhFDA-9LstYFN?ep$xT@k@q72j?;L(czK-Gh#g&M! zy#KjZ#i{!AYkE4L%Y^;vAg^w~Na${sfvR?p=l-cQ93p^70p7bD489KhF5CF!7&Ncz zE~=yEeS);o2k8UyVNN}=8iND1Xa*bhLwaC~Kvy4hGWb95YFDYI5&W%q!bAQol3~w} zou};62sJvPNgQcg+qo90B*+^ZcOvuZ#$jOZ!-+8S1EmflWr`A{8ue0hf7Y$!N4s(_ zBbfF%%TcuC5LJ87?R&x{OZ>$=eO7Pp0r! z#>mxGyWz&8dFIjJA;j@c8CIGR{~Ag%?(2rbx?!|8I)`#!>_sO358W&WK9oOOSFDHJ z`Cg}MmY1O_MEX$@>3|?%POJmNY++!t8J1)jzhN>p>05Tgg=a5ooc z*!>m`{cTo^)}pOj206Txua~S>OF|(s$g|F;EqR4}vYgDMg=#1U*teRv==l!%BwT(E z{02=)k;~?u4d2z}d>q;8LTQQg4$sVnf~& z+Ye6K0%FM_{g0*hFw6&{+Hrm%b${tcpCh%vimrD%KJ8fCedYbNF?~Ju^W!2^62u-w z3$rWIG8_OwS*W-|Vbf#x!K{RwdP;J_l0a(iCAwb~Hk3MQoKoiP%6}8BEqH43b&z!B!F;nJ1?ph<kkjn0|YCB4&I|K}gy z5Z_t(92fVy{RI2zmIT~j{~!oqBIhc693C+pamh9XyBUvf0o!sBJ4o=6?btPzXVCS@84jlcE7u#tMwP&5oiILL~yO> zXfFi3v9d)ZCG9Amo<=r~v+#*ZF%t$d?PwRFPjTR{!}imNigX z3SYK&gDEd7FRur{3xq6xs+?_oPk|za0e$#yfouZT{{{^2BB9lx0$E_#0~IbMHTCs@+{DD%C5Q5%1AN1>6z!`c00H$;yWcMquC6LU7I3as1AYT4Nk}{G6Qw3gPk<(2vMTfEV^kEW=+V@b`#UWR z#-j!<%S=du49MB07lMEWk{*J@TO*!=_d^nX_P;7dbD<|BBs{KkU`IR!>HrBFltqa1Fp&{)v_PLn-OIJluHZ<5l*m%i8)@Bj8wLB7NZvGM{qiW= za9hFB>Fda!rC*U2(nqBH)BKL8`(H(8letm!%*+@z<&+S<;tbw9aQdh3o%eBuD9AZ) zUEB2rUdoOkX^dm?-(^junh}5HIU^=ui%Oa-kt_0tWo)L!>R=v{8bXBiz}Ft&C0Fb4 zx3DP%x^p{!SdJ?GC56y^YN!3w9%GMGl>y#}X2uGN&bOy1$i*==_4IRON{oX)~83lc3C zN3(eMfpd>U-oZ9y8@iTfK_P@^fE+e>{}l#gAX?D&767KV7h+%21BeGWdCfGQXWrf2 zs;Bz*k*h}yqkDPR%MR`LmFMm+)9QXkHNCt}dW9q3vX2@$eOya>w*T0N%yS5N{@(Kc zWwQcN(*I*0baHvHasPhyj>qLud<1sbx0iJIN~n^U3gIGmu(afheNrAsG{*eU{hmZ? zSfXl%zOF(<%*OW=-6zSSjlS4N9{uMnnY)4bw09(aNMjOWN=p$YOwzP`zIV;&)~5zV zr`mkBq4~JoxW)U2aohrlg0U}IK(YXVcF@q zHslscTn{(cIHKa>a*vLjhAsF&w!swk>$tnF`zuT|G>Ph-j$>~EsyoWw%*uDX{_`z) zlp7UBVB+_s8D3qy`lCm@AR3SSNch26Ilo`R8<-75C6ke2)0O_L zPoPh**TN8*2~4h)p@Kw4)9sliA;*n4_h{HmWCGEr0;KV&PRs_fBarRqBZjBP$C?uI zaJf2H%^cJ@=-_v75&-~?UqFC4;EyYSM9{9c0Bp~J)KJlEa76MSV|YwcfO@z*A7%K1 zlos~2j*pGu?al^hR2FLEM zC$@IC8blppK!08IcfY@%Wquka@Gd^x%WF z+qK_-L36H`?~n@;s6#xtlhn#Ef{W&oSi5XDnAbnvcgaMzWs;>!?39ca>Aoalu%m&NJ8htS3tOJ)Y-~F|o zp1d&U{Z$zWF7gnP^a19z037r3rsriPARzMss9iF#ZXoA-B2AZ=7WV5AcGLCVQ89wH z=|G)jd%bI)mMI z9H3XQJAN?Q7~?dyu6wqg9FUV*kVZL5R;PTfbg4;>w!giyJEbM=8NVJ zJz#^2fu|ZwBW8g7uSj){00&SHYwPTEI0EczVVB{zr{0jz&iel17psVfKL5DgC#%|X z2deu{-`aNm+U@3l7E^ZH_UtcWYU=#{evNTRH^2O`4Auq^ZTZ>;&P}40b1mT`RnA3& z1%{lGISa<@?U#EWtdEusf#F>5o0***fEPyT%iZb1**l`zx$))>f{H<# z1Ckfs7a)^o6W1Em9PALsx51R^l#s~r{(+^!S|%4 zrDbJiwur977T|qIh!z+E&{N9U+p{~ZvR&*n1(Uzhmk}x@1|YSb=3t6;{X@usX{zg` ze4HW7u=)G4WRH;0Hj@S4q<$j#i(){Ol$iF(d?X?x%LFWk2@q~7PB$vo*C8>N1sT2^ z!go{IBlW#Gt=g_c@aae8VCLaeSrMjhpf^o{n3B>$j9x86nF^-+VJ~3y>>#OoS|rNF z%^eXLna-hKu>=SmaN-|-yq?Slr#*a11x!u=!}sUvaL0?f>jJTO5W?b!xU<^VW#G1Y z0p6Gi5X5XipSzr{m)b)Nk3%KY|I-n!kJz#p$VQYqT_1y(Lr^jC-xz7!t_ARrJ>~_~ z=>S1tqooxu9kU4n$RBbXU1O`N?wkmV*?Uk@g^6@|UawLR$R+xe919dLoKjl}?WBmSW2 z(2VXN<6OIv5bicnQ1OZ}@vt(6{J3V}W`4{zqNb(>E1Sx$BtVV*_{iW$wqC5eR`;So zL3C4locm+fspjE_xm}MT$>tUR|9JxgF`)&aKV#nxds1atwJe@zyUP6Q1vYhC9*%Plapsno^Ln`y_tC{MWNEhESPvtP*4Yw=n(P%54;aDISM7)pFhBRp;_y3-k+() zaC~&M1kq;I3>P4PR=SgYeSCcSzdpQC4fLF}5hr=kvqppgax;93mbOkYmaoIe$mk0J zgUlD*60>L|Jo@(m9Jwrj+}~XYnYj2ToKnf}J(tengUvCIhlj^P6R9DOu;T^~XU$Q{ z_TeEv1Txt(JZXZluYtw905LfqY?E(sbN4~4)r^LQ_NAcaq-~=4(+>HpTQl;{08AgU zn5-E$digRM4mpS5t?xTqHM}e=-TwNabD@%jle3G*w6_t?cYYOHjt)Lc;9q!a%?JuuGny82#MG z*S7=AV@Hkq>2F{rDS~e_(tWX zsO_j_si2~Qq9PCMLbCfG>#Ie(k(O8t_kN%eo46Q3j$*qIG&xI zeSu3U@HBCu~&T7{e5_VkyMOVnHC`n5CoR?%QR7SqRBB??`pNn6%lkCGBK66PFbw|d{_^E6hNilD3IqWG%aD|5 zYi)f~J_)fw>ESY#%&E|sj{JiptCu*xoKz&E0q*XKx@KKJ3~ zWC2(x^V5TM-9n>x7vRxMV8Pwwv-pC1?%cInZrHEret+)Qu&A=L;RboKKDYo6)JdRU zy_yYd-Rvi&`4eutO8@fMP+g$bm!UEQ8$zQGJqsPN5g$K7I6^(5L*WGp+|L zACJqP$+&d`PQ~TLDcis+&8(%z5P|VHZ9PQ>z5k=Q>kMlu-PSN3#49Stqllo8nGqSK z3Ib6|augAfCenmZRBotJ6hlNH(V0;WNO7o!7LgJN5)cfL8j%5{h8`&i14s=?kQS1V zD*@8_YWU^>+xcQd(V}fAs7>)Js5~1z1pm8I8CKXgh;R(AG*12m2cf z+92ZBpkC_>)Vruxi)Z=8-N+;mssLz8Ws;??U+^P<@~>`~pDwE91Numg-ta#Nfq8+5 zvd-htH)!xGQ0nyrhX9{5HMo851)$L)m?)j%3Fmbi5;7?%$pvWd?Dlw#_Rh|p<3(-p zini-{Ntxr%JC2tW7caHSLtJj(e%)ps-Vc>ZLtLgeShVpT^X-N28Stf@OYNMlUrz)h zaXQ)o)U93A!E!0=l(LEwqf+($jN5P{LfZ^}IEr!&}yi2?T9J z!=eAYIw+ktrl?yGK@aaQ1f;)zH=XVdkSopn7e8d(o#0^NHiuCG-w+a^Qb!5%dB^`ga#cuYPd?*qt3ZcaTrieW4d6uf?9Nln>~A zb;r=quop-T_LZLGRkWXq#pCzUYpwEXSvHQ2(BVV|wok(fCG~A!3xG*gEEb!dm$w>`+Y1P3YOgf*Ncc2t4oFvUV1vqZ z94-h@1iNe3{-pxMw`M76Kth^bJkYJ{3$b8wGN=r zpqEC2f+6w$_(ayfGeu3)*m%?^WUSYGcdbntkp@TO9)e0pDG;WR+sl1?(0V-x>_^=P zve;f}%mdk*J-Eys7;W+WepAgclxTpI_CVX_&%V&_1+m8-Xejn>Zdp>x!1E{)Dk4o< z;sC=znfBjVj(hS%X84VjLcG4UTcoD1G-&qn1}qgXZhnt* zdUGJ=*#nu$4p1k1FkC@&Qa;WQnFyxy{Us3gOdC^uh;M)R^EkJU2U;HH0x-$}iMn%@ zkHv72wc-X?Xpy-xEDf={CSV(G!ZL^z@5b+GYH4*Rfpxk^j~>17{yRnK>FGIdlH1oL z+MWb4wvP{bwcqosyB7rFL_$)Mk)rw9lZ#-HnUt~xBt@#62lFKv$_xajL8-?8%|K<3 zlSACgVHT(7+9%Q3JV{+m-3-Xe>>8ukcEDE?5)ur6>ZChbMfln#2kY2dmg^9=M zPo%%q2ju?Ejc@#4535FhnsMBp+Tbp)sMa`PdH)4lMSk<6<&lvl*R-EWjgB);yx_JD zHN7n*byU@-A`WU}U&V9Ez0L{)hw$idOh2OY;73)ldknC_A^vyx^1#eocId{Z4;mJN zHML{OWwT;q(J4SN+)`n&@c@uf;3;#?m=Xe zx0x>P%m%e))Rc_5yxhKsgMt4zQ9dy6(s0qrZl6V;7yWyJtixI=smSHIdT4HWoJ}H8>Is5B

zsneC0c#H=iIf{C5%p6UQ|H~w1TkQ`G#!xLA`66|t%rw**y)nk?`2LdgG0D8lM@!Mp z+(nx--7{-isehAsLTo?$cbO<O`Cu_g5SVazMvqn*yx5+_j zG3~rT9Lh149oH-n$ZyjflXC9TrgaNTU|tuPFdB}xaI@e4EU++U12+7rJ%k*>`w8f< z%ME4cwi-`eYJ)V|wzig3+4Acx<5O9Xa6Pw`hS2UVzK^V|IC$X4E0wcFqmq73gOJ48 zuA2|-w54=fYq#83NAzU8+dz!IRVnt}Zg8TB=Vh&ar>AT5A*wykGd&8MV^WF!eX4>j zw)ZB5YX17=-#eXuKcsG2n&KTVeRk-$krlH3VYAZAYQStTdoGhfWf*XXUrLqh{it%c z4}wg)#Lhu{1B*Xredi8p#K>f5=JGZ*CB~XHhAlEh1)u%7iKKdUWcZI$^`7GUIWUiU z56Nz4glK`2dwmO+jlOD$;iV0=OzGM*=xNyv4jwj*)o_PY?p&U18Lyf=Vs=*NJ;@ZE zI82}1uBfiQ2CWMve*7pAIlGQojl(H$Z3I>2f{%4d!HwImsh)p&6PyVO@9#T5Hwo(= z$RYc(`sneQ9=~53GUH;1a0#;EQyYsIlzxrPK)Y0o-5XW*f4Qi*9@9p&))jD_u#V1rWr#3cV8~vt~>tk{oq~GJGJ+CK%r{@ihj6HgSquh3VHW6>Ee*2TE z;<;o$dE7X+(g=`#XvcA|L-W1F#KdHS4jm!yYVE1HCv6X;XL{fc=MDPeUuo*98}a#a zB?2*<>uYi+S^kv+<2kMmVW_sip2UtcU7T+->=d(GxujRzArtA9RKXVB{??rA0%>}; zFBZL89LEf(XfYh@Q$HuYJi`N$oeLK{mp{p@-QE2mlJ@bN=G$*F8=w8syVV?#V-~Sr zbCsBG07bm|);rSGwe+Msvk#xF);?STUA-B3?Pw(0v z;W@kKQ-qfn3fjP*wD4|cQY|cN<$K?pW~~mVp6oKtZ&ZXzEtnRIk;6t}iYAI-BHUF| zk+<7$v-xhrG_B>q(#GU((>QQLJE2}`rkVH!OH{w!vAh!SX(g_yMuhB^C9 z)lUCVeF#JX`jcO-+P{M<&kj+=d%9b0Y*G|=uPed^CdJxCR8WV(Hssn@_@TK(3T(t#4IHN)dA6u?!{DYB%@or_H1=qH#`R)%@@fYu~u7)TP zbV9HN8!bEh8f&jozE<6^9ZhX2$uD@k{^{(GlDrz1j#q}lujCWP1K2F={qadNF0DID z<6vAdKA2u}u{R{j(oYoixhk*w+*dII{62mYabj7hb)o#?^kq?;ut)EQyLW%?zI9WIoUi?%D7nqu0_d>pTi%w(I{= zc(5C@_D18`vr(t9rzi6-I&WtWudn;Y!mOhSj`e;F&VpUmFkiXO8JtC`eXhSpRSrMl zm1R_$T;+7NckMKy93WK&@R`TVT?J8`S6UAA1u^1zPMvB=skb@tuC^bu zWTQgh0(FNU$#fbQ^^S@r^QY?V8Dyz%%I$lyAF++6xw^Q#Y2>buzR|>WBfF=pBYV6! zvnUyYV}9k(H=CQ4p2{s=^92{Y!*b_39lejcvumjTik@p=%*-ke11To>^It=Rp6b7-ddF;NjgVK0f3*i&%!B)-F(s#-vgYo?6 zvR-~b|L*Ie@n7`bhYo!vXL`+}+(*5Ub2+{jwBCjAE`dC{F^f%DG&?Z(d<1&*h+5%l z9(QYeg-E^7q+=Z7vx5>{x#-KR+EftG>IJx$O_7jnVwfb-)peC3Lp&xOnZ}(A&Rl(T z$pU*k<_bP5l7I(6)&}@?p4*K6qf7A0o^fZ*uw_({)F(oR(47uhIdE2U=C*tWV_>A& zxBsmtrGnd+xhtsCODlf%g1#(F=ZCQo4G|N4?I$2#W8DR!cb7$vTLIN9hhWSJ-i@}{ zr}*={^8`WU*7={J7h!s?4Dlk7$PAbL0Fj!+8Y+LL5w!5uakDQ)N4Oa$OkJ6=_I1Af z&+!0Moyf{rB&h2bysN&(gYUcmzK~dqAQK7j+ zKoTtDn+xaa39B1!1WYJ)WoNEU&?u5cxm8Y%@)oX#-J5hlZ?T8~DOgOQ?*O1`Cl|6a zG6DewS2f)|sN}QoD~r(CRH>L9dZShG*`Z7RA{GK0efcd4F__&gQ4!hxAG|a6T&*|Ryw4*@|ausL01M}=fi~AUn(~m-TV&f;! z(RTOyiZbw$-$YA2&D44i#fyBM#Kh`g2B`(A^i5EadX_ulQ9@#rBEn;?Iwe~w9rkgQ zgCAY_`1GV`ZMcdFjC%J#_@(Kzb1l>REEWqlP6#cVAm1}Bi0Kv|crMGEDpqG8+=KHn zzx7HEj$qwOLhM)Q!3@q}#=!eOTFkNt+!nL)gd^&M=lx1uKU8(cYBt@6RP}zSNHhBn zhTga%7GtD-W_)KhRusV31o+@|E705G+)}n+!+8(c6ctk61IkheTjdmbV9CEHRci~jCF5La~(n@!?r+sMqiXv#LnjOeE8w>i<1N_6BNa!~p)lW+Hk1@p-i;wz@! z0T@e3eYbMINt$nxqP?VbE}4&pM}L$a>sZSE{~T^eWT{iwa&OAnD5)JbRt{H4mv8_2 EKk9Go;Q#;t literal 0 HcmV?d00001 diff --git a/docs/index.rst b/docs/index.rst index e199ee0b..e5a8dc54 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -57,6 +57,7 @@ Contributions are very welcome. If you have bug reports or feature requests or q :caption: User Documentation install + workflow parsing preprocessing estimators diff --git a/docs/visualisation.rst b/docs/visualisation.rst index 549631c0..d18e1648 100644 --- a/docs/visualisation.rst +++ b/docs/visualisation.rst @@ -12,6 +12,7 @@ visualisation tools to help user to judge the estimate. plot_mbar_overlap_matrix plot_ti_dhdl plot_dF_state + plot_convergence .. _plot_overlap_matrix: @@ -131,6 +132,53 @@ Will give a plot looks like this A bar plot of the free energy differences evaluated between pairs of adjacent states via several methods, with corresponding error estimates for each method. +.. _plot_convergence: + +Forward and Backward Convergence +-------------------------------- +One way of determining the simulation end point is to plot the forward and +backward convergence of the estimate using +:func:`~alchemlyb.visualisation.plot_convergence`. + +Note that this is just a plotting function to plot [Klimovich2015]_ style +convergence plot. The user need to provide the forward and backward data list +and the corresponding error. :: + + >>> import pandas as pd + >>> from alchemtest.gmx import load_benzene + >>> from alchemlyb.parsing.gmx import extract_u_nk + >>> from alchemlyb.estimators import MBAR + + >>> bz = load_benzene().data + >>> data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] + >>> forward = [] + >>> forward_error = [] + >>> backward = [] + >>> backward_error = [] + >>> num_points = 10 + >>> for i in range(1, num_points+1): + >>> # Do the forward + >>> slice = int(len(data)/num_points*i) + >>> u_nk_coul = pd.concat([data[:slice] for data in data_list]) + >>> estimate = MBAR().fit(u_nk_coul) + >>> forward.append(estimate.delta_f_.iloc[0,-1]) + >>> forward_error.append(estimate.d_delta_f_.iloc[0,-1]) + >>> # Do the backward + >>> u_nk_coul = pd.concat([data[-slice:] for data in data_list]) + >>> estimate = MBAR().fit(u_nk_coul) + >>> backward.append(estimate.delta_f_.iloc[0,-1]) + >>> backward_error.append(estimate.d_delta_f_.iloc[0,-1]) + + >>> from alchemlyb.visualisation import plot_convergence + >>> ax = plot_convergence(forward, forward_error, backward, backward_error) + >>> ax.figure.savefig('dF_t.pdf') + +Will give a plot looks like this + +.. figure:: images/dF_t.png + + A convergence plot of showing that the forward and backward has converged + fully. .. [Klimovich2015] Klimovich, P.V., Shirts, M.R. & Mobley, D.L. Guidelines for the analysis of free energy calculations. J Comput Aided Mol Des 29, 397–411 diff --git a/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst b/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst new file mode 100644 index 00000000..fa72d19c --- /dev/null +++ b/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst @@ -0,0 +1,19 @@ +.. _visualisation_plot_convergence: + +Plot the Forward and Backward Convergence +========================================= + +The function :func:`~alchemlyb.visualisation.plot_convergence` allows +the user to visualise the convergence by plotting the free energy change +computed using the equilibrated snapshots between the proper target time frames +in both forward (data points are stored in `forward` and `forward_error`) and +reverse (data points are stored in `backward` and `backward_error`) directions. +The unit in the y axis could be labelled to other units by setting *units*, +which by default is kBT. The user can pass :class:`matplotlib.axes.Axes` into +the function to have the convergence drawn on a specific axes. + +Please check :ref:`How to plot convergence ` for usage. + +API Reference +------------- +.. autofunction:: alchemlyb.visualisation.plot_convergence \ No newline at end of file diff --git a/docs/visualisation/alchemlyb.visualisation.plot_dF_state.rst b/docs/visualisation/alchemlyb.visualisation.plot_dF_state.rst index d13c1e32..14bc2049 100644 --- a/docs/visualisation/alchemlyb.visualisation.plot_dF_state.rst +++ b/docs/visualisation/alchemlyb.visualisation.plot_dF_state.rst @@ -25,7 +25,7 @@ The figure could be plotted in *portrait* or *landscape* mode by setting the The user could pass a list of strings to `labels` to name the :class:`~alchemlyb.estimators` or a list of strings to `colors` to color the estimators differently. The unit in the y axis could be labelled to other -units by setting `units`, which by default is kcal/mol. +units by setting `units`, which by default is kBT. Please check :ref:`How to plot dF states ` for a complete example. diff --git a/docs/visualisation/alchemlyb.visualisation.plot_ti_dhdl.rst b/docs/visualisation/alchemlyb.visualisation.plot_ti_dhdl.rst index d4247515..3b1a3faa 100644 --- a/docs/visualisation/alchemlyb.visualisation.plot_ti_dhdl.rst +++ b/docs/visualisation/alchemlyb.visualisation.plot_ti_dhdl.rst @@ -11,7 +11,7 @@ When custom labels are desirable, the user could pass a list of strings to the *labels* for labelling each alchemical transformation differently. The color of each alchemical transformation could also be set by passing a list of color string to the *colors*. The unit in the y axis could be labelled to other units -by setting *units*, which by default is kcal/mol. The user can pass +by setting *units*, which by default is kBT. The user can pass :class:`matplotlib.axes.Axes` into the function to have the dhdl drawn on a specific axes. diff --git a/docs/workflow.rst b/docs/workflow.rst new file mode 100644 index 00000000..2a68f5d2 --- /dev/null +++ b/docs/workflow.rst @@ -0,0 +1,109 @@ +Automatic workflow +================== +Though **alchemlyb** is a library offering great flexibility in deriving free +energy estimate, it also provide a easy pipeline that is similar to +`Alchemical Analysis `_ and a +step-by-step version that allows more flexibility. + +Fully Automatic analysis +------------------------ +A interface similar to +`Alchemical Analysis `_ +could be excuted with a single line of command. :: + + >>> import os + >>> from alchemtest.gmx import load_ABFE + >>> from alchemlyb.workflows import ABFE + >>> # Obtain the path of the data + >>> dir = os.path.dirname(load_ABFE()['data']['complex'][0]) + >>> print(dir) + 'alchemtest/gmx/ABFE/complex' + >>> workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, + >>> prefix='dhdl', suffix='xvg', T=298, skiptime=10, + >>> uncorr='dhdl', threshold=50, + >>> methods=('mbar', 'bar', 'ti'), out='./', + >>> resultfilename='result.out', overlap='O_MBAR.pdf', + >>> breakdown=True, forwrev=10, log='result.log') + +This would give the free energy estimate using all of +:class:`~alchemlyb.estimators.TI`, :class:`~alchemlyb.estimators.BAR`, +:class:`~alchemlyb.estimators.MBAR` and the result will be written to the text +file `result.out`. :: + + ------------ --------------------- --------------------- --------------------- + States MBAR (kcal/mol) BAR (kcal/mol) TI (kcal/mol) + ------------ --------------------- --------------------- --------------------- + 0 -- 1 0.041 +- 0.001 0.041 +- 0.001 0.041 +- 0.001 + 1 -- 2 0.056 +- 0.001 0.055 +- 0.001 0.056 +- 0.001 + 2 -- 3 0.082 +- 0.001 0.082 +- 0.002 0.083 +- 0.002 + ... + 26 -- 27 0.766 +- 0.007 0.768 +- 0.010 0.770 +- 0.010 + 27 -- 28 0.694 +- 0.008 0.691 +- 0.011 0.690 +- 0.010 + 28 -- 29 0.620 +- 0.010 0.616 +- 0.011 0.625 +- 0.011 + ------------ --------------------- --------------------- --------------------- + coul: 6.290 +- 0.021 6.168 +- 0.026 6.168 +- 0.030 + vdw: 13.872 +- 0.061 13.852 +- 0.037 13.877 +- 0.066 + bonded: 1.469 +- 0.009 1.447 +- 0.003 1.461 +- 0.013 + TOTAL: 21.631 +- 0.064 21.467 +- 0.054 21.506 +- 0.074 + +The :ref:`overlay matrix for the MBAR estimator ` will be +plotted and saved to `O_MBAR.pdf`. + +The :ref:`dHdl for TI ` will be plotted to `dhdl_TI.pdf`. + +The :ref:`dF states ` will be plotted to `dF_state.pdf` in +portrait model and `dF_state_long.pdf` in landscape model. + +The forward and backward convergence will be plotted to `dF_t.pdf` using +:class:`~alchemlyb.estimators.MBAR`. + +.. currentmodule:: alchemlyb.workflows + +.. autoclass:: ABFE + :noindex: + +Semi-automatic analysis +----------------------- +The same analysis could also performed in steps allowing access and modification +to the data generated at each stage of the analysis. :: + + >>> import os + >>> from alchemtest.gmx import load_ABFE + >>> from alchemlyb.workflows import ABFE + >>> # Obtain the path of the data + >>> dir = os.path.dirname(load_ABFE()['data']['complex'][0]) + >>> print(dir) + 'alchemtest/gmx/ABFE/complex' + >>> # Load the data + >>> workflow = ABFE(software='Gromacs', dir=dir, + >>> prefix='dhdl', suffix='xvg', T=298, out='./', + >>> log='result.log') + >>> # Set the unit. + >>> workflow.update_units('kcal/mol') + >>> # Decorrelate the data. + >>> workflow.preprocess(skiptime=10, uncorr='dhdl', threshold=50) + >>> # Run the estimator + >>> workflow.estimate(methods=('mbar', 'bar', 'ti')) + >>> # write the result + >>> workflow.write(resultfilename='result.out') + >>> # Plot the overlap matrix + >>> workflow.plot_overlap_matrix(overlap='O_MBAR.pdf') + >>> # Plot the dHdl for TI + >>> workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') + >>> # Plot the dF states + >>> workflow.plot_dF_state(dF_state='dF_state.pdf') + >>> # Convergence analysis + >>> workflow.check_convergence(10, dF_t='dF_t.pdf') + + +.. currentmodule:: alchemlyb.workflows.ABFE + +.. autofunction:: update_units +.. autofunction:: preprocess +.. autofunction:: estimate +.. autofunction:: write +.. autofunction:: plot_overlap_matrix +.. autofunction:: plot_ti_dhdl +.. autofunction:: plot_dF_state +.. autofunction:: check_convergence + diff --git a/src/alchemlyb/__init__.py b/src/alchemlyb/__init__.py index 74f4e668..e44e9d15 100644 --- a/src/alchemlyb/__init__.py +++ b/src/alchemlyb/__init__.py @@ -1,4 +1,3 @@ - from ._version import get_versions __version__ = get_versions()['version'] del get_versions diff --git a/src/alchemlyb/preprocessing/subsampling.py b/src/alchemlyb/preprocessing/subsampling.py index e7fe88b8..21203d5b 100644 --- a/src/alchemlyb/preprocessing/subsampling.py +++ b/src/alchemlyb/preprocessing/subsampling.py @@ -57,7 +57,7 @@ def slicing(df, lower=None, upper=None, step=None, force=False): def statistical_inefficiency(df, series=None, lower=None, upper=None, step=None, - conservative=True): + conservative=True, drop_duplicates=True, sort=True): """Subsample a DataFrame based on the calculated statistical inefficiency of a timeseries. @@ -82,6 +82,10 @@ def statistical_inefficiency(df, series=None, lower=None, upper=None, step=None, intervals (the default). ``False`` will sample at non-uniform intervals to closely match the (fractional) statistical_inefficieny, as implemented in :func:`pymbar.timeseries.subsampleCorrelatedData`. + drop_duplicates : bool + Drop the duplicated lines based on time. + sort : bool + Sort the Dataframe based on the time column. Returns ------- @@ -119,13 +123,47 @@ def statistical_inefficiency(df, series=None, lower=None, upper=None, step=None, """ if _check_multiple_times(df): - raise KeyError("Duplicate time values found; statistical inefficiency " - "only works on a single, contiguous, " - "and sorted timeseries.") + if drop_duplicates: + if isinstance(df, pd.Series): + # remove the duplicate based on time + drop_duplicates_series = df.reset_index('time', name='').\ + drop_duplicates('time') + # Rest the time index + lambda_names = drop_duplicates_series.index.names + df = drop_duplicates_series.set_index('time', append=True).\ + reorder_levels(['time', *lambda_names]) + else: + # remove the duplicate based on time + drop_duplicates_df = df.reset_index('time').drop_duplicates('time') + # Rest the time index + lambda_names = drop_duplicates_df.index.names + df = drop_duplicates_df.set_index('time', append=True).\ + reorder_levels(['time', *lambda_names]) + + # Do the same withing with the series + if series is not None: + # remove the duplicate based on time + drop_duplicates_series = series.reset_index('time', name='').\ + drop_duplicates('time') + # Rest the time index + lambda_names = drop_duplicates_series.index.names + series = drop_duplicates_series.set_index('time', append=True).\ + reorder_levels(['time', *lambda_names]) + + else: + raise KeyError("Duplicate time values found; statistical inefficiency " + "only works on a single, contiguous, " + "and sorted timeseries.") if not _check_sorted(df): - raise KeyError("Statistical inefficiency only works as expected if " - "values are sorted by time, increasing.") + if sort: + df = df.sort_values('time') + + if series is not None: + series = series.sort_values('time') + else: + raise KeyError("Statistical inefficiency only works as expected if " + "values are sorted by time, increasing.") if series is not None: series = slicing(series, lower=lower, upper=upper, step=step) diff --git a/src/alchemlyb/tests/test_workflow.py b/src/alchemlyb/tests/test_workflow.py index 2d441440..d3085f63 100644 --- a/src/alchemlyb/tests/test_workflow.py +++ b/src/alchemlyb/tests/test_workflow.py @@ -3,16 +3,30 @@ import pytest import os -from alchemlyb.workflows.abfe import ABFE +from alchemlyb.workflows import ABFE from alchemtest.gmx import load_ABFE, load_expanded_ensemble_case_1 -data = load_ABFE() -dir = os.path.dirname(data['data']['complex'][0]) -workflow = ABFE(dir=dir, T=310) -workflow.preprocess() -workflow.estimate() -workflow.write() -workflow.plot_overlap_matrix() -workflow.plot_ti_dhdl() -workflow.plot_dF_state() -workflow.check_convergence(10) \ No newline at end of file +# data = load_ABFE() +# dir = os.path.dirname(data['data']['complex'][0]) +# workflow = ABFE(dir=dir, T=310) +# workflow.preprocess() +# workflow.estimate() +# workflow.write() +# workflow.plot_overlap_matrix() +# workflow.plot_ti_dhdl() +# workflow.plot_dF_state() +# workflow.check_convergence(10) +# +# workflow = ABFE(dir=dir, T=310) + +def test_full_automatic(): + # Obtain the path of the data + dir = os.path.dirname(load_ABFE()['data']['complex'][0]) + workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, + prefix='dhdl', suffix='xvg', T=310, skiptime=10, + uncorr='dhdl', threshold=50, + methods=('mbar', 'bar', 'ti'), out='./', + resultfilename='result.out', overlap='O_MBAR.pdf', + breakdown=True, forwrev=10, log='result.log') + print(workflow.convergence) +test_full_automatic() \ No newline at end of file diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index 4197e0dd..f03f28c9 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -4,8 +4,34 @@ def plot_convergence(forward, forward_error, backward, backward_error, units='kBT', ax=None): - """Plots the free energy change computed using the equilibrated snapshots between the proper target time frames (f_ts and r_ts) - in both forward (data points are stored in F_df and F_ddf) and reverse (data points are stored in R_df and R_ddf) directions.""" + """Plot the forward and backward convergence. + + Parameters + ---------- + forward : List + A list of free energy estimate from the first X% of data. + forward_error : List + A list of error from the first X% of data. + backward : List + A list of free energy estimate from the last X% of data. + backward_error : List + A list of error from the last X% of data. + units : str + The label for the unit of the estimate. Default: 'kBT' + ax : matplotlib.axes.Axes + Matplotlib axes object where the plot will be drawn on. If ax=None, + a new axes will be generated. + + Returns + ------- + matplotlib.axes.Axes + An axes with the forward and backward convergence drawn. + + Note + ---- + The code is taken and modified from + : `Alchemical Analysis `_ + """ if ax is None: fig, ax = plt.subplots(figsize=(8, 6)) diff --git a/src/alchemlyb/visualisation/dF_state.py b/src/alchemlyb/visualisation/dF_state.py index 4366c94e..27662579 100644 --- a/src/alchemlyb/visualisation/dF_state.py +++ b/src/alchemlyb/visualisation/dF_state.py @@ -43,8 +43,8 @@ def plot_dF_state(estimators, labels=None, colors=None, units='kBT', matplotlib.figure.Figure An Figure with the dF states drawn. - Notes - ----- + Note + ---- The code is taken and modified from : `Alchemical Analysis `_ diff --git a/src/alchemlyb/visualisation/mbar_matrix.py b/src/alchemlyb/visualisation/mbar_matrix.py index 682b8555..3b5e9204 100644 --- a/src/alchemlyb/visualisation/mbar_matrix.py +++ b/src/alchemlyb/visualisation/mbar_matrix.py @@ -34,8 +34,8 @@ def plot_mbar_overlap_matrix(matrix, skip_lambda_index=[], ax=None): matplotlib.axes.Axes An axes with the overlap matrix drawn. - Notes - ----- + Note + ---- The code is taken and modified from : `Alchemical Analysis `_ diff --git a/src/alchemlyb/visualisation/ti_dhdl.py b/src/alchemlyb/visualisation/ti_dhdl.py index 75d62e6f..d56b9b71 100644 --- a/src/alchemlyb/visualisation/ti_dhdl.py +++ b/src/alchemlyb/visualisation/ti_dhdl.py @@ -41,8 +41,8 @@ def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kBT', matplotlib.axes.Axes An axes with the TI dhdl drawn. - Notes - ----- + Note + ---- The code is taken and modified from : `Alchemical Analysis `_ diff --git a/src/alchemlyb/workflows/__init__.py b/src/alchemlyb/workflows/__init__.py index e69de29b..9074a415 100644 --- a/src/alchemlyb/workflows/__init__.py +++ b/src/alchemlyb/workflows/__init__.py @@ -0,0 +1 @@ +from .abfe import ABFE \ No newline at end of file diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 4dde2429..3bfd5d62 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -14,11 +14,76 @@ class ABFE(): + '''Alchemical Analysis style automatic workflow. + + Parameters + ---------- + units : str + The unit used for printing and plotting results. {'kcal/mol', 'kJ/mol', + 'kBT'} + software : str + The software used for generating input. {'Gromacs', 'amber', 'namd', + 'gomc'} + dir : str + Directory in which data files are stored. Default: './'. + prefix : str + Prefix for datafile sets. Default: 'dhdl'. + suffix : str + Suffix for datafile sets. Default: 'xvg'. + T : float + Temperature in K. Default: 298. + skiptime : float + Discard data prior to this specified time as 'equilibration' data. Units + picoseconds. Default: 0. + uncorr : str + The observable to be used for the autocorrelation analysis; either + 'dhdl_all' (obtained as a sum over all energy components) or 'dhdl' + (obtained as a sum over those energy components that are changing) or + 'dE'. In the latter case the energy differences dE_{i,i+1} (dE_{i,i-1} + for the last lambda) are used. Default: None (skipping this step). + threshold : int + Proceed with correlated samples if the number of uncorrelated samples is + found to be less than this number. If 0 is given, the time series + analysis will not be performed at all. Default: 50. + methods : str + A list of the methods to esitimate the free energy with. Default: None. + out : str + Directory in which the output files produced by this script will be + stored. Default: './'. + resultfilename : str + custom defined result filename. Default: None. (not writing the result) + overlap : str + The filename for the plot of overlap matrix. Default: None. (not + plotting). + breakdown : bool + Plot the free energy differences evaluated for each pair of adjacent + states for all methods, including the dH/dlambda curve for TI. Default: + None. (not plotting). + forwrev : int + Plot the free energy change as a function of time in both directions, + with the specified number of points in the time plot. The number of time + points (an integer) must be provided. Default: None. (not doing + convergence analysis). + log : str + The filename of the log file. Default: 'result.log' + + Attributes + ---------- + logger : Logger + The logging object. + file_list : list + The list of filenames sorted by the lambda state. + u_nk_list : list + The list of u_nk read from the files. + dHdl_list : list + The list of dHdl read from the files. + ''' def __init__(self, units='kcal/mol', software='Gromacs', dir='./', - prefix='dhdl', suffix='xvg', T=298, skiptime=None, uncorr=None, - threshold=50, estimator=None, out='./', resultfilename=None, + prefix='dhdl', suffix='xvg', T=298, skiptime=0, uncorr=None, + threshold=50, methods=None, out='./', resultfilename=None, overlap=None, breakdown=None, forwrev=None, log='result.log'): + logging.basicConfig(filename=log, level=logging.INFO) self.logger = logging.getLogger('alchemlyb.workflows.ABFE') self.logger.info('Initialise Alchemlyb ABFE Workflow') @@ -27,7 +92,7 @@ def __init__(self, units='kcal/mol', software='Gromacs', dir='./', self.T = T self.out = out - self._update_units(units) + self.update_units(units) self.logger.info('Finding files with prefix: {}, suffix: {} under ' 'directory {} produced by {}'.format(prefix, suffix, @@ -103,14 +168,14 @@ def __init__(self, units='kcal/mol', software='Gromacs', dir='./', self.u_nk_list = [u_nk_list[i] for i in index_list] self.dHdl_list = [dHdl_list[i] for i in index_list] - if skiptime is not None and uncorr is not None: + if uncorr is not None: self.preprocess(skiptime=skiptime, uncorr=uncorr, threshold=threshold) - if estimator is not None: - self.estimate(estimator) + if methods is not None: + self.estimate(methods) if resultfilename is not None: - self.write(estimator, resultfilename=resultfilename, units=units) + self.write(resultfilename=resultfilename) if overlap is not None: self.plot_overlap_matrix(overlap) @@ -121,8 +186,28 @@ def __init__(self, units='kcal/mol', software='Gromacs', dir='./', self.plot_dF_state(dF_state='dF_state_long.pdf', orientation='landscape') + if forwrev is not None: + self.check_convergence(forwrev, estimator='mbar', dF_t='dF_t.pdf') + + def update_units(self, units): + '''Update the plot and text output to the selected unit. + + Parameters + ---------- + units : str + The unit used for printing and plotting results. {'kcal/mol', + 'kJ/mol', 'kBT'} + + Attributes + ---------- + scaling_factor : float + The scaling factor to change the unit from kBT to the selected unit. - def _update_units(self, units): + Note + ---- + The internal representations are all in kBT. This function only changes + the unit when outputting text file or plotting the results. + ''' if units is not None: self.logger.info('Set unit to {}.'.format(units)) if units == 'kBT': @@ -139,6 +224,32 @@ def _update_units(self, units): self.units = units def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): + '''Preprocess the data by removing the equilibration time and + decorrelate the date. + + Parameters + ---------- + skiptime : float + Discard data prior to this specified time as 'equilibration' data. + Units picoseconds. Default: 0. + uncorr : str + The observable to be used for the autocorrelation analysis; either + 'dhdl_all' (obtained as a sum over all energy components) or 'dhdl' + (obtained as a sum over those energy components that are changing) + or 'dE'. In the latter case the energy differences dE_{i,i+1} + (dE_{i,i-1} for the last lambda) are used. Default: `dhdl` + threshold : int + Proceed with correlated samples if the number of uncorrelated + samples is found to be less than this number. If 0 is given, the + time series analysis will not be performed at all. Default: 50. + + Attributes + ---------- + u_nk_sample_list : list + The list of u_nk after decorrelation. + dHdl_sample_list : list + The list of dHdl after decorrelation. + ''' self.logger.info('Start preprocessing with skiptime of {} ' 'uncorrelation method of {} and ' 'threshold of {}'.format(skiptime, uncorr, threshold)) @@ -202,13 +313,26 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): '{}.'.format(len(subsample), index)) self.dHdl_sample_list.append(dHdl) - def estimate(self, estimators=('mbar', 'bar', 'ti')): + def estimate(self, methods=('mbar', 'bar', 'ti')): + '''Estimate the free energy using the selected estimator. + + Parameters + ---------- + methods : str + A list of the methods to esitimate the free energy with. Default: + ['TI', 'BAR', 'MBAR']. + + Attributes + ---------- + estimator : list + The list of estimators. + ''' # Make estimators into a tuple - if isinstance(estimators, str): - estimators = (estimators, ) + if isinstance(methods, str): + methods = (methods, ) self.logger.info( - 'Start running estimator: {}.'.format(','.join(estimators))) + 'Start running estimator: {}.'.format(','.join(methods))) self.estimator = {} # Use unprocessed data if preprocess is not performed. try: @@ -227,7 +351,7 @@ def estimate(self, estimators=('mbar', 'bar', 'ti')): self.logger.info( 'A total {} lines of u_nk is used.'.format(len(u_nk))) - for estimator in estimators: + for estimator in methods: if estimator.lower() == 'mbar' and len(u_nk) > 0: self.logger.info('Run MBAR estimator.') self.estimator['mbar'] = MBAR().fit(u_nk) @@ -243,8 +367,15 @@ def estimate(self, estimators=('mbar', 'bar', 'ti')): self.logger.warning( '{} is not a valid estimator.'.format(estimator)) - def write(self, resultfilename='result.out', units=None): - self._update_units(units) + def write(self, resultfilename='result.out'): + '''Write the result into a text file. + + Parameters + ---------- + resultfilename : str + A list of the methods to esitimate the free energy with. Default: + ['TI', 'BAR', 'MBAR']. + ''' # Write estimate self.logger.info('Write the estimate as txt file to {} under {} ' @@ -328,6 +459,22 @@ def write(self, resultfilename='result.out', units=None): f.write('\n'.join([' '.join(line) for line in result_out])) def plot_overlap_matrix(self, overlap='O_MBAR.pdf', ax=None): + '''Plot the overlap matrix for MBAR estimator using + :func:`~alchemlyb.visualisation.plot_mbar_overlap_matrix`. + + Parameters + ---------- + overlap : str + The filename for the plot of overlap matrix. Default: 'O_MBAR.pdf' + ax : matplotlib.axes.Axes + Matplotlib axes object where the plot will be drawn on. If ax=None, + a new axes will be generated. + + Returns + ------- + matplotlib.axes.Axes + An axes with the overlap matrix drawn. + ''' self.logger.info('Plot overlap matrix.') if 'mbar' in self.estimator: ax = plot_mbar_overlap_matrix(self.estimator['mbar'].overlap_matrix, @@ -340,9 +487,29 @@ def plot_overlap_matrix(self, overlap='O_MBAR.pdf', ax=None): self.logger.warning('MBAR estimator not found. ' 'Overlap matrix not plotted.') - def plot_ti_dhdl(self, dhdl_TI='dhdl_TI.pdf', units=None, labels=None, - colors=None, ax=None): - self._update_units(units) + def plot_ti_dhdl(self, dhdl_TI='dhdl_TI.pdf', labels=None, colors=None, + ax=None): + '''Plot the dHdl for TI estimator using + :func:`~alchemlyb.visualisation.plot_ti_dhdl`. + + Parameters + ---------- + dhdl_TI : str + The filename for the plot of TI dHdl. Default: 'dhdl_TI.pdf' + labels : List + list of labels for labelling all the alchemical transformations. + colors : List + list of colors for plotting all the alchemical transformations. + Default: ['r', 'g', '#7F38EC', '#9F000F', 'b', 'y'] + ax : matplotlib.axes.Axes + Matplotlib axes object where the plot will be drawn on. If ax=None, + a new axes will be generated. + + Returns + ------- + matplotlib.axes.Axes + An axes with the TI dhdl drawn. + ''' self.logger.info('Plot TI dHdl.') if 'ti' in self.estimator: ax = plot_ti_dhdl(self.estimator['ti'], units=self.units, @@ -353,8 +520,28 @@ def plot_ti_dhdl(self, dhdl_TI='dhdl_TI.pdf', units=None, labels=None, ''.format(dhdl_TI, self.out)) def plot_dF_state(self, dF_state='dF_state.pdf', labels=None, colors=None, - units=None, orientation='portrait', nb=10): - self._update_units(units) + orientation='portrait', nb=10): + '''Plot the dF states using + :func:`~alchemlyb.visualisation.plot_dF_state`. + + Parameters + ---------- + dF_state : str + The filename for the plot of dF states. Default: 'dF_state.pdf' + labels : List + list of labels for labelling different estimators. + colors : List + list of colors for plotting different estimators. + orientation : string + The orientation of the figure. Can be `portrait` or `landscape` + nb : int + Maximum number of dF states in one row in the `portrait` mode + + Returns + ------- + matplotlib.figure.Figure + An Figure with the dF states drawn. + ''' self.logger.info('Plot dF states.') fig = plot_dF_state(self.estimator.values(), labels=labels, colors=colors, units=self.units, @@ -365,8 +552,46 @@ def plot_dF_state(self, dF_state='dF_state.pdf', labels=None, colors=None, ''.format(dF_state, self.out)) def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', - units=None): - self._update_units(units) + ax=None): + '''Compute the forward and backward convergence and plotted with + :func:`~alchemlyb.visualisation.plot_convergence`. + + Parameters + ---------- + forwrev : int + Plot the free energy change as a function of time in both + directions, with the specified number of points in the time plot. + The number of time points (an integer) must be provided. + estimator : str + The estimator used for convergence analysis. Default: 'mbar' + dF_t : str + The filename for the plot of convergence. Default: 'dF_t.pdf' + ax : matplotlib.axes.Axes + Matplotlib axes object where the plot will be drawn on. If ax=None, + a new axes will be generated. + + Attributes + ---------- + convergence : DataFrame + The DataFrame with convergence data. :: + + Forward (kBT) F. Error (kBT) Backward (kBT) B. Error (kBT) + 0 33.988935 0.334676 35.666128 0.324426 + 1 35.075489 0.232150 35.382850 0.230944 + 2 34.919988 0.190424 35.156028 0.189489 + 3 34.929927 0.165316 35.242255 0.164400 + 4 34.957007 0.147852 35.247704 0.147191 + 5 35.003660 0.134952 35.214658 0.134458 + 6 35.070199 0.124956 35.178422 0.124664 + 7 35.019853 0.116970 35.096870 0.116783 + 8 35.035123 0.110147 35.225907 0.109742 + 9 35.113417 0.104280 35.113417 0.104280 + + Returns + ------- + matplotlib.axes.Axes + An axes with the convergence drawn. + ''' self.logger.info('Start convergence analysis.') self.logger.info('Check data availability.') @@ -469,3 +694,4 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', np.array(backward_error_list) * self.scaling_factor, units=self.units) ax.figure.savefig(join(self.out, dF_t)) + return ax From d4154278fced9fc42f724b51162820775615a23f Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 22 Mar 2021 16:36:59 +0000 Subject: [PATCH 006/123] add test --- .gitignore | 2 +- src/alchemlyb/estimators/ti_.py | 24 ++- src/alchemlyb/tests/test_workflow.py | 294 ++++++++++++++++++++++++--- src/alchemlyb/workflows/abfe.py | 62 ++++-- 4 files changed, 326 insertions(+), 56 deletions(-) diff --git a/.gitignore b/.gitignore index b2bbd452..c0335e78 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,6 @@ .vscode *.DS_Store build -.idea +.idea/ docs/_build/ src/alchemlyb.egg-info/ diff --git a/src/alchemlyb/estimators/ti_.py b/src/alchemlyb/estimators/ti_.py index 171102e4..4335fe7b 100644 --- a/src/alchemlyb/estimators/ti_.py +++ b/src/alchemlyb/estimators/ti_.py @@ -115,14 +115,16 @@ def separate_dhdl(self): Returns ---------- dHdl_list : list - A list of DataFrame such that dHdl_list[k][n] is the potential + A list of Series such that dHdl_list[k][n] is the potential energy gradient with respect to lambda for each configuration n and lambda k. """ if len(self.dhdl.index.names) == 1: - # If only one column is present - return [self.dhdl, ] + # If only one column is present convert to series + assert len(self.dhdl.columns) == 1 + name = self.dhdl.columns[0] + return [self.dhdl[name], ] else: dhdl_list = [] # get the lambda names @@ -135,10 +137,14 @@ def separate_dhdl(self): # Make sure that the start point is set to true as well diff[:-1, :] = diff[:-1, :] | diff[1:, :] for i in range(len(l_types)): - new = self.dhdl.iloc[diff[:,i], i] - # drop all other index - for l in l_types: - if l != l_types[i]: - new = new.reset_index(l, drop=True) - dhdl_list.append(new) + if any(diff[:,i]) == False: + # Skip if not pertubed + pass + else: + new = self.dhdl.iloc[diff[:,i], i] + # drop all other index + for l in l_types: + if l != l_types[i]: + new = new.reset_index(l, drop=True) + dhdl_list.append(new) return dhdl_list diff --git a/src/alchemlyb/tests/test_workflow.py b/src/alchemlyb/tests/test_workflow.py index d3085f63..82105d27 100644 --- a/src/alchemlyb/tests/test_workflow.py +++ b/src/alchemlyb/tests/test_workflow.py @@ -4,29 +4,271 @@ import os from alchemlyb.workflows import ABFE -from alchemtest.gmx import load_ABFE, load_expanded_ensemble_case_1 - -# data = load_ABFE() -# dir = os.path.dirname(data['data']['complex'][0]) -# workflow = ABFE(dir=dir, T=310) -# workflow.preprocess() -# workflow.estimate() -# workflow.write() -# workflow.plot_overlap_matrix() -# workflow.plot_ti_dhdl() -# workflow.plot_dF_state() -# workflow.check_convergence(10) -# -# workflow = ABFE(dir=dir, T=310) - -def test_full_automatic(): - # Obtain the path of the data - dir = os.path.dirname(load_ABFE()['data']['complex'][0]) - workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, - prefix='dhdl', suffix='xvg', T=310, skiptime=10, - uncorr='dhdl', threshold=50, - methods=('mbar', 'bar', 'ti'), out='./', - resultfilename='result.out', overlap='O_MBAR.pdf', - breakdown=True, forwrev=10, log='result.log') - print(workflow.convergence) -test_full_automatic() \ No newline at end of file +from alchemtest.gmx import load_ABFE, load_benzene + +class Test_automatic_ABFE(): + '''Test the full automatic workflow for load_ABFE from alchemtest.gmx for + three stage transformation.''' + + @staticmethod + @pytest.fixture(scope='class') + def workflow(): + dir = os.path.dirname(load_ABFE()['data']['complex'][0]) + workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, + prefix='dhdl', suffix='xvg', T=310, skiptime=10, + uncorr='dhdl', threshold=50, + methods=('mbar', 'bar', 'ti'), out='./', + resultfilename='result.out', overlap='O_MBAR.pdf', + breakdown=True, forwrev=10, log='result.log') + return workflow + + def test_read(self, workflow): + '''test if the files has been loaded correctly.''' + assert len(workflow.u_nk_list) == 30 + assert len(workflow.dHdl_list) == 30 + assert all([len(u_nk) == 1001 for u_nk in workflow.u_nk_list]) + assert all([len(dHdl) == 1001 for dHdl in workflow.dHdl_list]) + + def test_subsample(self, workflow): + '''Test if the data has been shrinked by subsampling.''' + assert len(workflow.u_nk_sample_list) == 30 + assert len(workflow.dHdl_sample_list) == 30 + assert all([len(u_nk) < 1001 for u_nk in workflow.u_nk_sample_list]) + assert all([len(dHdl) < 1001 for dHdl in workflow.dHdl_sample_list]) + + def test_estomator(self, workflow): + '''Test if all three estimator has been used.''' + assert len(workflow.estimator) == 3 + assert 'mbar' in workflow.estimator + assert 'ti' in workflow.estimator + assert 'bar' in workflow.estimator + + def test_write(self, workflow): + '''test if the result has been written.''' + with open('result.out', 'r') as f: + text = f.read() + assert len(text.split('\n')) == 37 + os.remove('result.out') + + def test_O_MBAR(self, workflow): + '''test if the O_MBAR.pdf has been plotted.''' + assert os.path.isfile('O_MBAR.pdf') + os.remove('O_MBAR.pdf') + + def test_dhdl_TI(self, workflow): + '''test if the dhdl_TI.pdf has been plotted.''' + assert os.path.isfile('dhdl_TI.pdf') + os.remove('dhdl_TI.pdf') + + def test_dF_state(self, workflow): + '''test if the dF_state.pdf has been plotted.''' + assert os.path.isfile('dF_state.pdf') + os.remove('dF_state.pdf') + assert os.path.isfile('dF_state_long.pdf') + os.remove('dF_state_long.pdf') + + def test_convergence(self, workflow): + '''test if the dF_state.pdf has been plotted.''' + assert os.path.isfile('dF_t.pdf') + os.remove('dF_t.pdf') + assert len(workflow.convergence) == 10 + +class Test_manual_ABFE(): + '''Test the manual workflow for load_ABFE from alchemtest.gmx for three + stage transformation.''' + + @staticmethod + @pytest.fixture(scope='class') + def workflow(): + dir = os.path.dirname(load_ABFE()['data']['complex'][0]) + workflow = ABFE(software='Gromacs', dir=dir, prefix='dhdl', + suffix='xvg', T=310) + workflow.update_units('kcal/mol') + workflow.preprocess(skiptime=10, uncorr='dhdl', threshold=50) + workflow.estimate(methods=('mbar', 'bar', 'ti')) + workflow.write(resultfilename='result.out') + workflow.plot_overlap_matrix(overlap='O_MBAR.pdf') + workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') + workflow.plot_dF_state(dF_state='dF_state.pdf') + workflow.check_convergence(10, dF_t='dF_t.pdf') + return workflow + + def test_read(self, workflow): + '''test if the files has been loaded correctly.''' + assert len(workflow.u_nk_list) == 30 + assert len(workflow.dHdl_list) == 30 + assert all([len(u_nk) == 1001 for u_nk in workflow.u_nk_list]) + assert all([len(dHdl) == 1001 for dHdl in workflow.dHdl_list]) + + def test_subsample(self, workflow): + '''Test if the data has been shrinked by subsampling.''' + assert len(workflow.u_nk_sample_list) == 30 + assert len(workflow.dHdl_sample_list) == 30 + assert all([len(u_nk) < 1001 for u_nk in workflow.u_nk_sample_list]) + assert all([len(dHdl) < 1001 for dHdl in workflow.dHdl_sample_list]) + + def test_estomator(self, workflow): + '''Test if all three estimator has been used.''' + assert len(workflow.estimator) == 3 + assert 'mbar' in workflow.estimator + assert 'ti' in workflow.estimator + assert 'bar' in workflow.estimator + + def test_write(self, workflow): + '''test if the result has been written.''' + with open('result.out', 'r') as f: + text = f.read() + assert len(text.split('\n')) == 37 + os.remove('result.out') + + def test_O_MBAR(self, workflow): + '''test if the O_MBAR.pdf has been plotted.''' + assert os.path.isfile('O_MBAR.pdf') + os.remove('O_MBAR.pdf') + + def test_dhdl_TI(self, workflow): + '''test if the dhdl_TI.pdf has been plotted.''' + assert os.path.isfile('dhdl_TI.pdf') + os.remove('dhdl_TI.pdf') + + def test_dF_state(self, workflow): + '''test if the dF_state.pdf has been plotted.''' + assert os.path.isfile('dF_state.pdf') + os.remove('dF_state.pdf') + + def test_convergence(self, workflow): + '''test if the dF_state.pdf has been plotted.''' + assert os.path.isfile('dF_t.pdf') + os.remove('dF_t.pdf') + assert len(workflow.convergence) == 10 + +class Test_automatic_benzene(): + '''Test the full automatic workflow for load_benzene from alchemtest.gmx for + single stage transformation.''' + + @staticmethod + @pytest.fixture(scope='class') + def workflow(): + dir = os.path.dirname(os.path.dirname( + load_benzene()['data']['Coulomb'][0])) + dir = os.path.join(dir, '*') + workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, + prefix='dhdl', suffix='bz2', T=310, skiptime=0, + uncorr='dhdl', threshold=50, + methods=('mbar', 'bar', 'ti'), out='./', + resultfilename='result.out', overlap='O_MBAR.pdf', + breakdown=True, forwrev=10, log='result.log') + return workflow + + def test_read(self, workflow): + '''test if the files has been loaded correctly.''' + assert len(workflow.u_nk_list) == 5 + assert len(workflow.dHdl_list) == 5 + assert all([len(u_nk) == 4001 for u_nk in workflow.u_nk_list]) + assert all([len(dHdl) == 4001 for dHdl in workflow.dHdl_list]) + + def test_subsample(self, workflow): + '''Test if the data has been shrinked by subsampling.''' + assert len(workflow.u_nk_sample_list) == 5 + assert len(workflow.dHdl_sample_list) == 5 + assert all([len(u_nk) < 4001 for u_nk in workflow.u_nk_sample_list]) + assert all([len(dHdl) < 4001 for dHdl in workflow.dHdl_sample_list]) + + def test_estomator(self, workflow): + '''Test if all three estimator has been used.''' + assert len(workflow.estimator) == 3 + assert 'mbar' in workflow.estimator + assert 'ti' in workflow.estimator + assert 'bar' in workflow.estimator + + def test_write(self, workflow): + '''test if the result has been written.''' + with open('result.out', 'r') as f: + text = f.read() + assert len(text.split('\n')) == 10 + os.remove('result.out') + + def test_O_MBAR(self, workflow): + '''test if the O_MBAR.pdf has been plotted.''' + assert os.path.isfile('O_MBAR.pdf') + os.remove('O_MBAR.pdf') + + def test_dhdl_TI(self, workflow): + '''test if the dhdl_TI.pdf has been plotted.''' + assert os.path.isfile('dhdl_TI.pdf') + os.remove('dhdl_TI.pdf') + + def test_dF_state(self, workflow): + '''test if the dF_state.pdf has been plotted.''' + assert os.path.isfile('dF_state.pdf') + os.remove('dF_state.pdf') + assert os.path.isfile('dF_state_long.pdf') + os.remove('dF_state_long.pdf') + + def test_convergence(self, workflow): + '''test if the dF_state.pdf has been plotted.''' + assert os.path.isfile('dF_t.pdf') + os.remove('dF_t.pdf') + assert len(workflow.convergence) == 10 + +class Test_unpertubed_lambda(): + '''Test the if two lamdas present and one of them is not pertubed. + + fep bound +time fep-lambda bound-lambda +0.0 0.5 0 12.958159 0 +10.0 0.5 0 -1.062968 0 +20.0 0.5 0 1.019020 0 +30.0 0.5 0 5.029051 0 +40.0 0.5 0 7.768072 0 + +Where only fep-lambda changes but the bonded-lambda is always 0. + ''' + + @staticmethod + @pytest.fixture(scope='class') + def workflow(): + dir = os.path.dirname(os.path.dirname( + load_benzene()['data']['Coulomb'][0])) + dir = os.path.join(dir, '*') + workflow = ABFE(software='Gromacs', dir=dir, prefix='dhdl', + suffix='bz2', T=310) + # Block the n_uk + workflow.u_nk_list = [] + # Add another lambda column + for dHdl in workflow.dHdl_list: + dHdl.insert(1, 'bound-lambda', [1.0, ] * len(dHdl)) + dHdl.insert(1, 'bound', [1.0, ] * len(dHdl)) + dHdl.set_index('bound-lambda', append=True, inplace=True) + + workflow.estimate(methods=('ti', )) + workflow.write(resultfilename='result.out') + workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') + workflow.plot_dF_state(dF_state='dF_state.pdf') + workflow.check_convergence(10, dF_t='dF_t.pdf', estimator='ti') + return workflow + + def test_write(self, workflow): + '''test if the result has been written.''' + with open('result.out', 'r') as f: + text = f.read() + assert len(text.split('\n')) == 11 + os.remove('result.out') + + def test_dhdl_TI(self, workflow): + '''test if the dhdl_TI.pdf has been plotted.''' + assert os.path.isfile('dhdl_TI.pdf') + os.remove('dhdl_TI.pdf') + + def test_dF_state(self, workflow): + '''test if the dF_state.pdf has been plotted.''' + assert os.path.isfile('dF_state.pdf') + os.remove('dF_state.pdf') + + def test_convergence(self, workflow): + '''test if the dF_state.pdf has been plotted.''' + assert os.path.isfile('dF_t.pdf') + os.remove('dF_t.pdf') + assert len(workflow.convergence) == 10 + + diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 3bfd5d62..b1cdd889 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -267,8 +267,13 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): # Find the current column index # Select the first row and remove the first column (Time) key = u_nk.index.values[0][1:] - col = u_nk[key] - subsample = statistical_inefficiency(u_nk, u_nk[key]) + if len(key) > 1: + # Multiple keys + col = u_nk[key] + else: + # Single key + col = u_nk[key[0]] + subsample = statistical_inefficiency(u_nk, col) elif uncorr == 'dhdl_all': subsample = statistical_inefficiency(u_nk, u_nk.sum(axis=1)) elif uncorr == 'dE': @@ -335,21 +340,23 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): 'Start running estimator: {}.'.format(','.join(methods))) self.estimator = {} # Use unprocessed data if preprocess is not performed. - try: - dHdl = pd.concat(self.dHdl_sample_list) - except AttributeError: - dHdl = pd.concat(self.dHdl_list) - self.logger.warning('dHdl has not been preprocessed.') - self.logger.info( - 'A total {} lines of dHdl is used.'.format(len(dHdl))) + if 'ti' in methods: + try: + dHdl = pd.concat(self.dHdl_sample_list) + except (AttributeError, ValueError): + dHdl = pd.concat(self.dHdl_list) + self.logger.warning('dHdl has not been preprocessed.') + self.logger.info( + 'A total {} lines of dHdl is used.'.format(len(dHdl))) - try: - u_nk = pd.concat(self.u_nk_sample_list) - except AttributeError: - u_nk = pd.concat(self.u_nk_list) - self.logger.warning('u_nk has not been preprocessed.') - self.logger.info( - 'A total {} lines of u_nk is used.'.format(len(u_nk))) + if 'bar' in methods or 'mbar' in methods: + try: + u_nk = pd.concat(self.u_nk_sample_list) + except (AttributeError, ValueError): + u_nk = pd.concat(self.u_nk_list) + self.logger.warning('u_nk has not been preprocessed.') + self.logger.info( + 'A total {} lines of u_nk is used.'.format(len(u_nk))) for estimator in methods: if estimator.lower() == 'mbar' and len(u_nk) > 0: @@ -426,10 +433,25 @@ def write(self, resultfilename='result.out'): self.logger.info('write the staged result from estimator {}'.format( estimator_name)) for index, stage in enumerate(stages): - start = list(reversed( - [state[index] for state in estimator.states_])).index(0) - start = num_states - start - 1 - end = [state[index] for state in estimator.states_].index(1) + if len(stages) == 1: + start = 0 + end = len(estimator.states_) - 1 + else: + # Get the start and the end of the state + lambda_min = min( + [state[index] for state in estimator.states_]) + lambda_max = max( + [state[index] for state in estimator.states_]) + if lambda_min == lambda_max: + # Deal with the case where a certain lambda is used but + # not perturbed + start = 0 + end = 0 + else: + states = [state[index] for state in estimator.states_] + start = list(reversed(states)).index(lambda_min) + start = num_states - start - 1 + end = states.index(lambda_max) self.logger.info( 'Stage {} is from state {} to state {}.'.format( stage, start, end)) From f814bac14a4c9f28b0a440cf41ce7ba6e644ae67 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 22 Mar 2021 17:15:46 +0000 Subject: [PATCH 007/123] Update mbar_.py --- src/alchemlyb/estimators/mbar_.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py index 975d177b..4354a4b8 100644 --- a/src/alchemlyb/estimators/mbar_.py +++ b/src/alchemlyb/estimators/mbar_.py @@ -48,7 +48,9 @@ class MBAR(BaseEstimator): """ def __init__(self, maximum_iterations=10000, relative_tolerance=1.0e-7, - initial_f_k=None, method='hybr', verbose=False): + initial_f_k=None, method='adaptive', verbose=False): + # method='adaptive' is used as it is more stable + # https://github.com/choderalab/pymbar/issues/419 self.maximum_iterations = maximum_iterations self.relative_tolerance = relative_tolerance From e197f67501326df261703d5febb13e3ba793ac54 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 27 Mar 2021 10:38:58 +0000 Subject: [PATCH 008/123] cleanup --- src/alchemlyb/parsing/gmx.py | 2 - src/alchemlyb/preprocessing/subsampling.py | 56 +++------------------- src/alchemlyb/workflows/abfe.py | 7 +-- src/alchemlyb/workflows/base.py | 31 ------------ 4 files changed, 11 insertions(+), 85 deletions(-) delete mode 100644 src/alchemlyb/workflows/base.py diff --git a/src/alchemlyb/parsing/gmx.py b/src/alchemlyb/parsing/gmx.py index cc5a23ee..abc35168 100644 --- a/src/alchemlyb/parsing/gmx.py +++ b/src/alchemlyb/parsing/gmx.py @@ -104,7 +104,6 @@ def extract_u_nk(xvg, T): u_k = u_k.reset_index().set_index(newind) u_k.name = 'u_nk' - u_k.attrs['state'] = state return u_k @@ -182,7 +181,6 @@ def extract_dHdl(xvg, T): dHdl= dHdl.reset_index().set_index(newind) dHdl.name='dH/dl' - dHdl.attrs['state'] = state return dHdl diff --git a/src/alchemlyb/preprocessing/subsampling.py b/src/alchemlyb/preprocessing/subsampling.py index 21203d5b..2fe635b6 100644 --- a/src/alchemlyb/preprocessing/subsampling.py +++ b/src/alchemlyb/preprocessing/subsampling.py @@ -2,17 +2,13 @@ """ import numpy as np -import pandas as pd from pymbar.timeseries import (statisticalInefficiency, detectEquilibration, subsampleCorrelatedData, ) def _check_multiple_times(df): - if isinstance(df, pd.Series): - return df.sort_index(0).reset_index('time', name='').duplicated('time').any() - else: - return df.sort_index(0).reset_index('time').duplicated('time').any() + return df.sort_index(0).reset_index(0).duplicated('time').any() def _check_sorted(df): return df.reset_index(0)['time'].is_monotonic_increasing @@ -57,7 +53,7 @@ def slicing(df, lower=None, upper=None, step=None, force=False): def statistical_inefficiency(df, series=None, lower=None, upper=None, step=None, - conservative=True, drop_duplicates=True, sort=True): + conservative=True): """Subsample a DataFrame based on the calculated statistical inefficiency of a timeseries. @@ -82,10 +78,6 @@ def statistical_inefficiency(df, series=None, lower=None, upper=None, step=None, intervals (the default). ``False`` will sample at non-uniform intervals to closely match the (fractional) statistical_inefficieny, as implemented in :func:`pymbar.timeseries.subsampleCorrelatedData`. - drop_duplicates : bool - Drop the duplicated lines based on time. - sort : bool - Sort the Dataframe based on the time column. Returns ------- @@ -123,47 +115,13 @@ def statistical_inefficiency(df, series=None, lower=None, upper=None, step=None, """ if _check_multiple_times(df): - if drop_duplicates: - if isinstance(df, pd.Series): - # remove the duplicate based on time - drop_duplicates_series = df.reset_index('time', name='').\ - drop_duplicates('time') - # Rest the time index - lambda_names = drop_duplicates_series.index.names - df = drop_duplicates_series.set_index('time', append=True).\ - reorder_levels(['time', *lambda_names]) - else: - # remove the duplicate based on time - drop_duplicates_df = df.reset_index('time').drop_duplicates('time') - # Rest the time index - lambda_names = drop_duplicates_df.index.names - df = drop_duplicates_df.set_index('time', append=True).\ - reorder_levels(['time', *lambda_names]) - - # Do the same withing with the series - if series is not None: - # remove the duplicate based on time - drop_duplicates_series = series.reset_index('time', name='').\ - drop_duplicates('time') - # Rest the time index - lambda_names = drop_duplicates_series.index.names - series = drop_duplicates_series.set_index('time', append=True).\ - reorder_levels(['time', *lambda_names]) - - else: - raise KeyError("Duplicate time values found; statistical inefficiency " - "only works on a single, contiguous, " - "and sorted timeseries.") + raise KeyError("Duplicate time values found; statistical inefficiency " + "only works on a single, contiguous, " + "and sorted timeseries.") if not _check_sorted(df): - if sort: - df = df.sort_values('time') - - if series is not None: - series = series.sort_values('time') - else: - raise KeyError("Statistical inefficiency only works as expected if " - "values are sorted by time, increasing.") + raise KeyError("Statistical inefficiency only works as expected if " + "values are sorted by time, increasing.") if series is not None: series = slicing(series, lower=lower, upper=upper, step=step) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index b1cdd889..ba65ff87 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -329,8 +329,9 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): Attributes ---------- - estimator : list - The list of estimators. + estimator : dict + The dictionary of estimators. The key for MBAR is 'mbar', for BAR is + 'bar' and for TI is 'ti'. ''' # Make estimators into a tuple if isinstance(methods, str): @@ -714,6 +715,6 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', np.array(forward_error_list) * self.scaling_factor, np.array(backward_list) * self.scaling_factor, np.array(backward_error_list) * self.scaling_factor, - units=self.units) + units=self.units, ax=ax) ax.figure.savefig(join(self.out, dF_t)) return ax diff --git a/src/alchemlyb/workflows/base.py b/src/alchemlyb/workflows/base.py deleted file mode 100644 index 9c9857e7..00000000 --- a/src/alchemlyb/workflows/base.py +++ /dev/null @@ -1,31 +0,0 @@ -import os -import logging - -class WorkflowBase(): - '''Base class for workflow creation. - ''' - def __init__(self, **kwargs): - - self.load_data(**kwargs) - self.sub_sampling(**kwargs) - self.sub_sampling(**kwargs) - self.compute(**kwargs) - self.plot(**kwargs) - self.write(**kwargs) - - def load_data(self, - return xvg_list - - def sub_sampling(self): - pass - - def compute(self): - pass - - def plot(self): - pass - - def write(self): - pass - - From 37a25033ac073fe45f6f02b78d527add10f86676 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 27 Mar 2021 11:08:57 +0000 Subject: [PATCH 009/123] remove dependency for scipy constant --- src/alchemlyb/constants.py | 5 +++++ src/alchemlyb/visualisation/convergence.py | 4 ++-- src/alchemlyb/workflows/abfe.py | 9 +++------ 3 files changed, 10 insertions(+), 8 deletions(-) create mode 100644 src/alchemlyb/constants.py diff --git a/src/alchemlyb/constants.py b/src/alchemlyb/constants.py new file mode 100644 index 00000000..830b49f7 --- /dev/null +++ b/src/alchemlyb/constants.py @@ -0,0 +1,5 @@ +'''Physical and mathematical constants and units.''' +# Taken from scipy.constants since py2 doesn't support it +k = 1.380649e-23 +N_A = 6.02214076e+23 +kJ2kcal = 0.239006 \ No newline at end of file diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index f03f28c9..a8c634c5 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -49,10 +49,10 @@ def plot_convergence(forward, forward_error, backward, backward_error, backward[-1] + backward_error[-1], color='#D2B9D3', zorder=1) line1 = ax.errorbar(f_ts, forward, yerr=forward_error, color='#736AFF', - lw=3, solid_capstyle='round', zorder=2, marker='o', + lw=3, zorder=2, marker='o', mfc='w', mew=2.5, mec='#736AFF', ms=12,) line2 = ax.errorbar(r_ts, backward, yerr=backward_error, color='#C11B17', - lw=3, solid_capstyle='round', zorder=3, marker='o', + lw=3, zorder=3, marker='o', mfc='w', mew=2.5, mec='#C11B17', ms=12, ) # ax.set_xlim(0,0.5) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index ba65ff87..b17c3861 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -3,7 +3,6 @@ from glob import glob import pandas as pd import numpy as np -import scipy import logging from ..parsing import gmx, amber, namd, gomc @@ -11,6 +10,7 @@ from ..estimators import MBAR, BAR, TI from ..visualisation import (plot_mbar_overlap_matrix, plot_ti_dhdl, plot_dF_state, plot_convergence) +from ..constants import k, N_A, kJ2kcal class ABFE(): @@ -213,12 +213,9 @@ def update_units(self, units): if units == 'kBT': self.scaling_factor = 1 elif units == 'kJ/mol': - self.scaling_factor = scipy.constants.k * self.T * scipy.constants.N_A / \ - 1000 + self.scaling_factor = k * self.T * N_A / 1000 elif units == 'kcal/mol': - kJ2kcal = 0.239006 - self.scaling_factor = scipy.constants.k * self.T * scipy.constants.N_A / \ - 1000 * kJ2kcal + self.scaling_factor = k * self.T * N_A / 1000 * kJ2kcal else: raise NameError('{} is not a valid unit.'.format(units)) self.units = units From ba2dcc3f7f95217a5bde9e0aebfdb71077aca920 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 27 Mar 2021 12:13:00 +0000 Subject: [PATCH 010/123] dump test --- result.log | 75 ++++++++++++++++++++++ src/alchemlyb/tests/test_workflow.py | 43 +++++++++++++ src/alchemlyb/visualisation/convergence.py | 2 +- src/alchemlyb/workflows/abfe.py | 57 +++++----------- 4 files changed, 134 insertions(+), 43 deletions(-) create mode 100644 result.log diff --git a/result.log b/result.log new file mode 100644 index 00000000..e4f70dd2 --- /dev/null +++ b/result.log @@ -0,0 +1,75 @@ +INFO:alchemlyb.workflows.ABFE:Initialise Alchemlyb ABFE Workflow +INFO:alchemlyb.workflows.ABFE:Set temperature to 310 K. +INFO:alchemlyb.workflows.ABFE:Set unit to kcal/mol. +INFO:alchemlyb.workflows.ABFE:Finding files with prefix: dhdl, suffix: bz2 under directory /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/* produced by Gromacs +INFO:alchemlyb.workflows.ABFE:Found 5 xvg files. +INFO:alchemlyb.workflows.ABFE:Unsorted file list: +/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0750/dhdl.xvg.bz2 +/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0500/dhdl.xvg.bz2 +/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/1000/dhdl.xvg.bz2 +/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0000/dhdl.xvg.bz2 +/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0250/dhdl.xvg.bz2 +INFO:alchemlyb.workflows.ABFE:Using Gromacs parser to read the data. +INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of u_nk from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0750/dhdl.xvg.bz2 +INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of dhdl from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0750/dhdl.xvg.bz2 +INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of u_nk from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0500/dhdl.xvg.bz2 +INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of dhdl from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0500/dhdl.xvg.bz2 +INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of u_nk from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/1000/dhdl.xvg.bz2 +INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of dhdl from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/1000/dhdl.xvg.bz2 +INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of u_nk from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0000/dhdl.xvg.bz2 +INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of dhdl from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0000/dhdl.xvg.bz2 +INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of u_nk from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0250/dhdl.xvg.bz2 +INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of dhdl from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0250/dhdl.xvg.bz2 +INFO:alchemlyb.workflows.ABFE:Sort files according to the u_nk. +INFO:alchemlyb.workflows.ABFE:Sorted file list: +/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0000/dhdl.xvg.bz2 +/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0250/dhdl.xvg.bz2 +/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0500/dhdl.xvg.bz2 +/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0750/dhdl.xvg.bz2 +/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/1000/dhdl.xvg.bz2 +INFO:alchemlyb.workflows.ABFE:Start preprocessing with skiptime of 0 uncorrelation method of dhdl and threshold of 50 +INFO:alchemlyb.workflows.ABFE:Processing the u_nk data set with skiptime of 0. +WARNING:alchemlyb.workflows.ABFE:Number of u_nk 39 for state 0 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 0. +WARNING:alchemlyb.workflows.ABFE:Number of u_nk 20 for state 1 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 1. +WARNING:alchemlyb.workflows.ABFE:Number of u_nk 39 for state 2 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 2. +WARNING:alchemlyb.workflows.ABFE:Number of u_nk 20 for state 3 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 3. +WARNING:alchemlyb.workflows.ABFE:Number of u_nk 39 for state 4 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 4. +WARNING:alchemlyb.workflows.ABFE:Number of dHdl 13 for state 0 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 0. +WARNING:alchemlyb.workflows.ABFE:Number of dHdl 20 for state 1 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 1. +WARNING:alchemlyb.workflows.ABFE:Number of dHdl 39 for state 2 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 2. +WARNING:alchemlyb.workflows.ABFE:Number of dHdl 39 for state 3 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 3. +WARNING:alchemlyb.workflows.ABFE:Number of dHdl 20 for state 4 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 4. +INFO:alchemlyb.workflows.ABFE:Start preprocessing with skiptime of 0 uncorrelation method of dhdl_all and threshold of 50 +INFO:alchemlyb.workflows.ABFE:Processing the u_nk data set with skiptime of 0. +WARNING:alchemlyb.workflows.ABFE:Number of u_nk 13 for state 0 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 0. +WARNING:alchemlyb.workflows.ABFE:Number of u_nk 20 for state 1 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 1. +WARNING:alchemlyb.workflows.ABFE:Number of u_nk 39 for state 2 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 2. +WARNING:alchemlyb.workflows.ABFE:Number of u_nk 39 for state 3 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 3. +WARNING:alchemlyb.workflows.ABFE:Number of u_nk 20 for state 4 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 4. +WARNING:alchemlyb.workflows.ABFE:Number of dHdl 13 for state 0 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 0. +WARNING:alchemlyb.workflows.ABFE:Number of dHdl 20 for state 1 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 1. +WARNING:alchemlyb.workflows.ABFE:Number of dHdl 39 for state 2 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 2. +WARNING:alchemlyb.workflows.ABFE:Number of dHdl 39 for state 3 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 3. +WARNING:alchemlyb.workflows.ABFE:Number of dHdl 20 for state 4 is less than the threshold 50. +INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 4. +INFO:alchemlyb.workflows.ABFE:Start preprocessing with skiptime of 0 uncorrelation method of dE and threshold of 50 +INFO:alchemlyb.workflows.ABFE:Processing the u_nk data set with skiptime of 0. diff --git a/src/alchemlyb/tests/test_workflow.py b/src/alchemlyb/tests/test_workflow.py index 82105d27..57fac04e 100644 --- a/src/alchemlyb/tests/test_workflow.py +++ b/src/alchemlyb/tests/test_workflow.py @@ -5,6 +5,7 @@ from alchemlyb.workflows import ABFE from alchemtest.gmx import load_ABFE, load_benzene +from alchemtest.namd import load_tyr2ala class Test_automatic_ABFE(): '''Test the full automatic workflow for load_ABFE from alchemtest.gmx for @@ -271,4 +272,46 @@ def test_convergence(self, workflow): os.remove('dF_t.pdf') assert len(workflow.convergence) == 10 +class Test_methods(): + '''Test various methods.''' + @staticmethod + @pytest.fixture(scope='class') + def workflow(): + dir = os.path.dirname(os.path.dirname( + load_benzene()['data']['Coulomb'][0])) + dir = os.path.join(dir, '*') + workflow = ABFE(software='Gromacs', dir=dir, prefix='dhdl', + suffix='bz2', T=310) + return workflow + + def test_change_unit(self, workflow): + workflow.update_units('kBT') + assert workflow.scaling_factor == 1 + workflow.update_units('kcal/mol') + assert np.isclose(workflow.scaling_factor, 0.6, atol=0.1) + workflow.update_units('kJ/mol') + assert np.isclose(workflow.scaling_factor, 2.6, atol=0.1) + with pytest.raises(NameError): + workflow.update_units('aaa') + + def test_uncorr_threshold(self, workflow): + original_u_nk = workflow.u_nk_list + original_dHdl = workflow.dHdl_list + workflow.u_nk_list = [u_nk[:40] for u_nk in original_u_nk] + workflow.dHdl_list = [dHdl[:40] for dHdl in original_dHdl] + workflow.preprocess(threshold=50) + assert all([len(u_nk) == 40 for u_nk in workflow.u_nk_sample_list]) + assert all([len(dHdl) == 40 for dHdl in workflow.dHdl_sample_list]) + workflow.u_nk_list = original_u_nk + workflow.dHdl_list = original_dHdl + + def test_single_estimator(self, workflow): + workflow.estimate(methods='mbar') + assert len(workflow.estimator) == 1 + assert 'mbar' in workflow.estimator + + def test_bar_convergence(self, workflow): + workflow.check_convergence(10, estimator='bar') + assert os.path.isfile('dF_t.pdf') + os.remove('dF_t.pdf') diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index a8c634c5..3f85991f 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -32,7 +32,7 @@ def plot_convergence(forward, forward_error, backward, backward_error, The code is taken and modified from : `Alchemical Analysis `_ """ - if ax is None: + if ax is None: # pragma: no cover fig, ax = plt.subplots(figsize=(8, 6)) plt.setp(ax.spines['bottom'], color='#D2B9D3', lw=3, zorder=-2) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index b17c3861..d4784189 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -108,21 +108,6 @@ def __init__(self, units='kcal/mol', software='Gromacs', dir='./', software)) extract_u_nk = gmx.extract_u_nk extract_dHdl = gmx.extract_dHdl - elif software.lower() == 'amber': - self.logger.info('Using {} parser to read the data.'.format( - software)) - extract_u_nk = amber.extract_u_nk - extract_dHdl = amber.extract_dHdl - elif software.lower() == 'namd': - self.logger.info('Using {} parser to read the data.'.format( - software)) - extract_u_nk = namd.extract_u_nk - self.logger.warning('No dHdl reader available for NAMD.') - elif software.lower() == 'gomc': - self.logger.info('Using {} parser to read the data.'.format( - software)) - extract_u_nk = gomc.extract_u_nk - extract_dHdl = gomc.extract_dHdl else: raise NameError('{} parser not found.'.format(software)) @@ -134,7 +119,7 @@ def __init__(self, units='kcal/mol', software='Gromacs', dir='./', self.logger.info( 'Reading {} lines of u_nk from {}'.format(len(u_nk), xvg)) u_nk_list.append(u_nk) - except: + except: # pragma: no cover self.logger.warning( 'Error reading read u_nk from {}.'.format(xvg)) @@ -143,7 +128,7 @@ def __init__(self, units='kcal/mol', software='Gromacs', dir='./', self.logger.info( 'Reading {} lines of dhdl from {}'.format(len(dhdl), xvg)) dHdl_list.append(dhdl) - except: + except: # pragma: no cover self.logger.warning( 'Error reading read dhdl from {}.'.format(xvg)) @@ -259,7 +244,7 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): for index, u_nk in enumerate(self.u_nk_list): # Find the starting frame - u_nk = u_nk[u_nk.index.get_level_values('time')>skiptime] + u_nk = u_nk[u_nk.index.get_level_values('time') >= skiptime] if uncorr == 'dhdl': # Find the current column index # Select the first row and remove the first column (Time) @@ -271,21 +256,9 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): # Single key col = u_nk[key[0]] subsample = statistical_inefficiency(u_nk, col) - elif uncorr == 'dhdl_all': - subsample = statistical_inefficiency(u_nk, u_nk.sum(axis=1)) - elif uncorr == 'dE': - # Using the same logic as alchemical-analysis - key = u_nk.index.values[0][1:] - index = u_nk.columns.values.tolist().index(key) - # for the state that is not the last state, take the state+1 - if index + 1 < len(u_nk.columns): - subsample = statistical_inefficiency( - u_nk, u_nk.iloc[:, index + 1]) - # for the state that is the last state, take the state-1 - else: - subsample = statistical_inefficiency( - u_nk, u_nk.iloc[:, index - 1]) else: + # The dhdl_all and dE will be implemented here when #48 is + # merged raise NameError( 'Decorrelation method {} not found.'.format(uncorr)) @@ -294,26 +267,26 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): 'less than the threshold {}.'.format( len(subsample), index, threshold)) self.logger.info('Take all the u_nk for state {}.'.format(index)) - self.u_nk_sample_list.append(subsample) + self.u_nk_sample_list.append(u_nk) else: self.logger.info('Take {} uncorrelated u_nk for state ' '{}.'.format(len(subsample), index)) - self.u_nk_sample_list.append(u_nk) + self.u_nk_sample_list.append(subsample) self.dHdl_sample_list = [] for index, dHdl in enumerate(self.dHdl_list): - dHdl = dHdl[dHdl.index.get_level_values('time') > skiptime] + dHdl = dHdl[dHdl.index.get_level_values('time') >= skiptime] subsample = statistical_inefficiency(dHdl, dHdl.sum(axis=1)) if len(subsample) < threshold: self.logger.warning('Number of dHdl {} for state {} is ' 'less than the threshold {}.'.format( len(subsample), index, threshold)) self.logger.info('Take all the dHdl for state {}.'.format(index)) - self.dHdl_sample_list.append(subsample) + self.dHdl_sample_list.append(dHdl) else: self.logger.info('Take {} uncorrelated dHdl for state ' '{}.'.format(len(subsample), index)) - self.dHdl_sample_list.append(dHdl) + self.dHdl_sample_list.append(subsample) def estimate(self, methods=('mbar', 'bar', 'ti')): '''Estimate the free energy using the selected estimator. @@ -405,7 +378,7 @@ def write(self, resultfilename='result.out'): dHdl = self.dHdl_list[0] stages = dHdl.reset_index('time').index.names self.logger.info('use the stage name from dHdl') - except: + except: # pragma: no cover stages = [] self.logger.warning('No stage name found in dHdl or u_nk') for stage in stages: @@ -503,7 +476,7 @@ def plot_overlap_matrix(self, overlap='O_MBAR.pdf', ax=None): self.logger.info('Plot overlap matrix to {} under {}.' ''.format(self.out, overlap)) return ax - else: + else: # pragma: no cover self.logger.warning('MBAR estimator not found. ' 'Overlap matrix not plotted.') @@ -623,7 +596,7 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', dHdl_list = self.dHdl_list self.logger.info('Subsampled dHdl not available, ' 'use original data instead.') - except AttributeError: + except AttributeError: # pragma: no cover self.logger.warning('dHdl is not available.') try: @@ -634,7 +607,7 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', u_nk_list = self.u_nk_list self.logger.info('Subsampled u_nk not available, ' 'use original data instead.') - except AttributeError: + except AttributeError: # pragma: no cover self.logger.warning('u_nk is not available.') if estimator.lower() == 'mbar': @@ -646,7 +619,7 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', elif estimator.lower() == 'ti': self.logger.info('Use TI estimator for convergence analysis.') estimator_fit = TI().fit - else: + else: # pragma: no cover self.logger.warning( '{} is not a valid estimator.'.format(estimator)) From 94d7bb61f9e77367ca80a4aaaaac4d3ee14d74ec Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 27 Mar 2021 12:33:29 +0000 Subject: [PATCH 011/123] Update test_workflow.py --- src/alchemlyb/tests/test_workflow.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow.py b/src/alchemlyb/tests/test_workflow.py index 57fac04e..7b827c58 100644 --- a/src/alchemlyb/tests/test_workflow.py +++ b/src/alchemlyb/tests/test_workflow.py @@ -168,13 +168,6 @@ def test_read(self, workflow): assert all([len(u_nk) == 4001 for u_nk in workflow.u_nk_list]) assert all([len(dHdl) == 4001 for dHdl in workflow.dHdl_list]) - def test_subsample(self, workflow): - '''Test if the data has been shrinked by subsampling.''' - assert len(workflow.u_nk_sample_list) == 5 - assert len(workflow.dHdl_sample_list) == 5 - assert all([len(u_nk) < 4001 for u_nk in workflow.u_nk_sample_list]) - assert all([len(dHdl) < 4001 for dHdl in workflow.dHdl_sample_list]) - def test_estomator(self, workflow): '''Test if all three estimator has been used.''' assert len(workflow.estimator) == 3 From e4f6627731290236b0ae2a824e7fd72d3efac91f Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 27 Mar 2021 13:21:01 +0000 Subject: [PATCH 012/123] bump test --- src/alchemlyb/tests/test_workflow.py | 6 ++++++ src/alchemlyb/workflows/abfe.py | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow.py b/src/alchemlyb/tests/test_workflow.py index 7b827c58..7fc774f6 100644 --- a/src/alchemlyb/tests/test_workflow.py +++ b/src/alchemlyb/tests/test_workflow.py @@ -308,3 +308,9 @@ def test_bar_convergence(self, workflow): workflow.check_convergence(10, estimator='bar') assert os.path.isfile('dF_t.pdf') os.remove('dF_t.pdf') + + def test_unprocessed_n_uk(self, workflow): + workflow.u_nk_sample_list = [] + workflow.estimate() + assert len(workflow.estimator) == 3 + assert 'mbar' in workflow.estimator diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index d4784189..86922ca9 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -108,7 +108,7 @@ def __init__(self, units='kcal/mol', software='Gromacs', dir='./', software)) extract_u_nk = gmx.extract_u_nk extract_dHdl = gmx.extract_dHdl - else: + else: # pragma: no cover raise NameError('{} parser not found.'.format(software)) u_nk_list = [] @@ -256,7 +256,7 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): # Single key col = u_nk[key[0]] subsample = statistical_inefficiency(u_nk, col) - else: + else: # pragma: no cover # The dhdl_all and dE will be implemented here when #48 is # merged raise NameError( @@ -339,9 +339,9 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): elif estimator.lower() == 'ti' and len(dHdl) > 0: self.logger.info('Run TI estimator.') self.estimator['ti'] = TI().fit(dHdl) - elif estimator.lower() == 'mbar' or estimator.lower() == 'bar': + elif estimator.lower() == 'mbar' or estimator.lower() == 'bar': # pragma: no cover self.logger.warning('MBAR or BAR estimator require u_nk') - else: + else: # pragma: no cover self.logger.warning( '{} is not a valid estimator.'.format(estimator)) From 2e0aed0e6a1a7827de6075ec18fcf449fc04dc21 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 27 Mar 2021 13:56:49 +0000 Subject: [PATCH 013/123] cleanup --- src/alchemlyb/tests/test_ti_estimators.py | 5 +++++ src/alchemlyb/tests/test_workflow.py | 2 -- src/alchemlyb/workflows/abfe.py | 3 +-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/alchemlyb/tests/test_ti_estimators.py b/src/alchemlyb/tests/test_ti_estimators.py index 208e27a9..ae25aaf5 100644 --- a/src/alchemlyb/tests/test_ti_estimators.py +++ b/src/alchemlyb/tests/test_ti_estimators.py @@ -136,3 +136,8 @@ class TestTI(TIestimatorMixin): def X_delta_f(self, request): get_dHdl, E, dE = request.param return get_dHdl(), E, dE + +def test_TI_separate_dhdl(): + dHdl = gomc_benzene_dHdl() + estimator = TI().fit(dHdl) + assert all([isinstance(dhdl, pd.Series) for dhdl in estimator.separate_dhdl()]) \ No newline at end of file diff --git a/src/alchemlyb/tests/test_workflow.py b/src/alchemlyb/tests/test_workflow.py index 7fc774f6..3ca4c687 100644 --- a/src/alchemlyb/tests/test_workflow.py +++ b/src/alchemlyb/tests/test_workflow.py @@ -1,11 +1,9 @@ -import pandas as pd import numpy as np import pytest import os from alchemlyb.workflows import ABFE from alchemtest.gmx import load_ABFE, load_benzene -from alchemtest.namd import load_tyr2ala class Test_automatic_ABFE(): '''Test the full automatic workflow for load_ABFE from alchemtest.gmx for diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 86922ca9..427a5e30 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -1,11 +1,10 @@ -import os from os.path import join from glob import glob import pandas as pd import numpy as np import logging -from ..parsing import gmx, amber, namd, gomc +from ..parsing import gmx from ..preprocessing.subsampling import statistical_inefficiency from ..estimators import MBAR, BAR, TI from ..visualisation import (plot_mbar_overlap_matrix, plot_ti_dhdl, From 5ab748e9f43bb639678e08a26404f27cd7d64fbd Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 27 Mar 2021 13:58:18 +0000 Subject: [PATCH 014/123] Delete result.log --- result.log | 75 ------------------------------------------------------ 1 file changed, 75 deletions(-) delete mode 100644 result.log diff --git a/result.log b/result.log deleted file mode 100644 index e4f70dd2..00000000 --- a/result.log +++ /dev/null @@ -1,75 +0,0 @@ -INFO:alchemlyb.workflows.ABFE:Initialise Alchemlyb ABFE Workflow -INFO:alchemlyb.workflows.ABFE:Set temperature to 310 K. -INFO:alchemlyb.workflows.ABFE:Set unit to kcal/mol. -INFO:alchemlyb.workflows.ABFE:Finding files with prefix: dhdl, suffix: bz2 under directory /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/* produced by Gromacs -INFO:alchemlyb.workflows.ABFE:Found 5 xvg files. -INFO:alchemlyb.workflows.ABFE:Unsorted file list: -/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0750/dhdl.xvg.bz2 -/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0500/dhdl.xvg.bz2 -/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/1000/dhdl.xvg.bz2 -/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0000/dhdl.xvg.bz2 -/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0250/dhdl.xvg.bz2 -INFO:alchemlyb.workflows.ABFE:Using Gromacs parser to read the data. -INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of u_nk from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0750/dhdl.xvg.bz2 -INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of dhdl from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0750/dhdl.xvg.bz2 -INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of u_nk from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0500/dhdl.xvg.bz2 -INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of dhdl from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0500/dhdl.xvg.bz2 -INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of u_nk from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/1000/dhdl.xvg.bz2 -INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of dhdl from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/1000/dhdl.xvg.bz2 -INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of u_nk from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0000/dhdl.xvg.bz2 -INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of dhdl from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0000/dhdl.xvg.bz2 -INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of u_nk from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0250/dhdl.xvg.bz2 -INFO:alchemlyb.workflows.ABFE:Reading 4001 lines of dhdl from /Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0250/dhdl.xvg.bz2 -INFO:alchemlyb.workflows.ABFE:Sort files according to the u_nk. -INFO:alchemlyb.workflows.ABFE:Sorted file list: -/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0000/dhdl.xvg.bz2 -/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0250/dhdl.xvg.bz2 -/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0500/dhdl.xvg.bz2 -/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/0750/dhdl.xvg.bz2 -/Users/zhiyiwu/miniforge3_x86/envs/mdanalysis-dev/lib/python3.7/site-packages/alchemtest/gmx/benzene/Coulomb/1000/dhdl.xvg.bz2 -INFO:alchemlyb.workflows.ABFE:Start preprocessing with skiptime of 0 uncorrelation method of dhdl and threshold of 50 -INFO:alchemlyb.workflows.ABFE:Processing the u_nk data set with skiptime of 0. -WARNING:alchemlyb.workflows.ABFE:Number of u_nk 39 for state 0 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 0. -WARNING:alchemlyb.workflows.ABFE:Number of u_nk 20 for state 1 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 1. -WARNING:alchemlyb.workflows.ABFE:Number of u_nk 39 for state 2 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 2. -WARNING:alchemlyb.workflows.ABFE:Number of u_nk 20 for state 3 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 3. -WARNING:alchemlyb.workflows.ABFE:Number of u_nk 39 for state 4 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 4. -WARNING:alchemlyb.workflows.ABFE:Number of dHdl 13 for state 0 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 0. -WARNING:alchemlyb.workflows.ABFE:Number of dHdl 20 for state 1 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 1. -WARNING:alchemlyb.workflows.ABFE:Number of dHdl 39 for state 2 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 2. -WARNING:alchemlyb.workflows.ABFE:Number of dHdl 39 for state 3 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 3. -WARNING:alchemlyb.workflows.ABFE:Number of dHdl 20 for state 4 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 4. -INFO:alchemlyb.workflows.ABFE:Start preprocessing with skiptime of 0 uncorrelation method of dhdl_all and threshold of 50 -INFO:alchemlyb.workflows.ABFE:Processing the u_nk data set with skiptime of 0. -WARNING:alchemlyb.workflows.ABFE:Number of u_nk 13 for state 0 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 0. -WARNING:alchemlyb.workflows.ABFE:Number of u_nk 20 for state 1 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 1. -WARNING:alchemlyb.workflows.ABFE:Number of u_nk 39 for state 2 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 2. -WARNING:alchemlyb.workflows.ABFE:Number of u_nk 39 for state 3 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 3. -WARNING:alchemlyb.workflows.ABFE:Number of u_nk 20 for state 4 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the u_nk for state 4. -WARNING:alchemlyb.workflows.ABFE:Number of dHdl 13 for state 0 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 0. -WARNING:alchemlyb.workflows.ABFE:Number of dHdl 20 for state 1 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 1. -WARNING:alchemlyb.workflows.ABFE:Number of dHdl 39 for state 2 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 2. -WARNING:alchemlyb.workflows.ABFE:Number of dHdl 39 for state 3 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 3. -WARNING:alchemlyb.workflows.ABFE:Number of dHdl 20 for state 4 is less than the threshold 50. -INFO:alchemlyb.workflows.ABFE:Take all the dHdl for state 4. -INFO:alchemlyb.workflows.ABFE:Start preprocessing with skiptime of 0 uncorrelation method of dE and threshold of 50 -INFO:alchemlyb.workflows.ABFE:Processing the u_nk data set with skiptime of 0. From ddcaa8d209b5bedd24d9e3bf3e6d921ada56c1ff Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 27 Mar 2021 14:00:18 +0000 Subject: [PATCH 015/123] cleanup --- src/alchemlyb/preprocessing/subsampling.py | 6 +++--- src/alchemlyb/visualisation/convergence.py | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/alchemlyb/preprocessing/subsampling.py b/src/alchemlyb/preprocessing/subsampling.py index a51c943e..e7fe88b8 100644 --- a/src/alchemlyb/preprocessing/subsampling.py +++ b/src/alchemlyb/preprocessing/subsampling.py @@ -120,12 +120,12 @@ def statistical_inefficiency(df, series=None, lower=None, upper=None, step=None, """ if _check_multiple_times(df): raise KeyError("Duplicate time values found; statistical inefficiency " - "only works on a single, contiguous, " - "and sorted timeseries.") + "only works on a single, contiguous, " + "and sorted timeseries.") if not _check_sorted(df): raise KeyError("Statistical inefficiency only works as expected if " - "values are sorted by time, increasing.") + "values are sorted by time, increasing.") if series is not None: series = slicing(series, lower=lower, upper=upper, step=step) diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index 3f85991f..57e74868 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -55,8 +55,6 @@ def plot_convergence(forward, forward_error, backward, backward_error, lw=3, zorder=3, marker='o', mfc='w', mew=2.5, mec='#C11B17', ms=12, ) - # ax.set_xlim(0,0.5) - plt.xticks(r_ts[::2], fontsize=10) plt.yticks(fontsize=10) From 90d033a312ff5217a9501d3bfff4348f03dc061d Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 27 Mar 2021 17:46:22 +0000 Subject: [PATCH 016/123] logging is a global variable --- src/alchemlyb/workflows/abfe.py | 136 ++++++++++++++++---------------- 1 file changed, 68 insertions(+), 68 deletions(-) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 427a5e30..9eb494cf 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -84,26 +84,26 @@ def __init__(self, units='kcal/mol', software='Gromacs', dir='./', log='result.log'): logging.basicConfig(filename=log, level=logging.INFO) - self.logger = logging.getLogger('alchemlyb.workflows.ABFE') - self.logger.info('Initialise Alchemlyb ABFE Workflow') + logger = logging.getLogger('alchemlyb.workflows.ABFE') + logger.info('Initialise Alchemlyb ABFE Workflow') - self.logger.info('Set temperature to {} K.'.format(T)) + logger.info('Set temperature to {} K.'.format(T)) self.T = T self.out = out self.update_units(units) - self.logger.info('Finding files with prefix: {}, suffix: {} under ' + logger.info('Finding files with prefix: {}, suffix: {} under ' 'directory {} produced by {}'.format(prefix, suffix, dir, software)) file_list = glob(join(dir, prefix + '*' + suffix)) - self.logger.info('Found {} xvg files.'.format(len(file_list))) - self.logger.info('Unsorted file list: \n{}'.format('\n'.join( + logger.info('Found {} xvg files.'.format(len(file_list))) + logger.info('Unsorted file list: \n{}'.format('\n'.join( file_list))) if software.lower() == 'gromacs': - self.logger.info('Using {} parser to read the data.'.format( + logger.info('Using {} parser to read the data.'.format( software)) extract_u_nk = gmx.extract_u_nk extract_dHdl = gmx.extract_dHdl @@ -115,31 +115,31 @@ def __init__(self, units='kcal/mol', software='Gromacs', dir='./', for xvg in file_list: try: u_nk = extract_u_nk(xvg, T=T) - self.logger.info( + logger.info( 'Reading {} lines of u_nk from {}'.format(len(u_nk), xvg)) u_nk_list.append(u_nk) except: # pragma: no cover - self.logger.warning( + logger.warning( 'Error reading read u_nk from {}.'.format(xvg)) try: dhdl = extract_dHdl(xvg, T=T) - self.logger.info( + logger.info( 'Reading {} lines of dhdl from {}'.format(len(dhdl), xvg)) dHdl_list.append(dhdl) except: # pragma: no cover - self.logger.warning( + logger.warning( 'Error reading read dhdl from {}.'.format(xvg)) # # Sort the files according to the state if len(u_nk_list) > 0: - self.logger.info('Sort files according to the u_nk.') + logger.info('Sort files according to the u_nk.') column_names = u_nk_list[0].columns.values.tolist() index_list = sorted(range(len(file_list)), key=lambda x:column_names.index( u_nk_list[x].reset_index('time').index.values[0])) else: - self.logger.info('Sort files according to the dHdl.') + logger.info('Sort files according to the dHdl.') column_names = sorted([dHdl.reset_index('time').index.values[0] for dHdl in dHdl_list]) index_list = sorted(range(len(file_list)), @@ -147,7 +147,7 @@ def __init__(self, units='kcal/mol', software='Gromacs', dir='./', dHdl_list[x].reset_index('time').index.values[0])) self.file_list = [file_list[i] for i in index_list] - self.logger.info('Sorted file list: \n{}'.format('\n'.join( + logger.info('Sorted file list: \n{}'.format('\n'.join( self.file_list))) self.u_nk_list = [u_nk_list[i] for i in index_list] self.dHdl_list = [dHdl_list[i] for i in index_list] @@ -193,7 +193,7 @@ def update_units(self, units): the unit when outputting text file or plotting the results. ''' if units is not None: - self.logger.info('Set unit to {}.'.format(units)) + logger.info('Set unit to {}.'.format(units)) if units == 'kBT': self.scaling_factor = 1 elif units == 'kJ/mol': @@ -231,11 +231,11 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): dHdl_sample_list : list The list of dHdl after decorrelation. ''' - self.logger.info('Start preprocessing with skiptime of {} ' + logger.info('Start preprocessing with skiptime of {} ' 'uncorrelation method of {} and ' 'threshold of {}'.format(skiptime, uncorr, threshold)) if len(self.u_nk_list) > 0: - self.logger.info( + logger.info( 'Processing the u_nk data set with skiptime of {}.'.format( skiptime)) @@ -262,13 +262,13 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): 'Decorrelation method {} not found.'.format(uncorr)) if len(subsample) < threshold: - self.logger.warning('Number of u_nk {} for state {} is ' + logger.warning('Number of u_nk {} for state {} is ' 'less than the threshold {}.'.format( len(subsample), index, threshold)) - self.logger.info('Take all the u_nk for state {}.'.format(index)) + logger.info('Take all the u_nk for state {}.'.format(index)) self.u_nk_sample_list.append(u_nk) else: - self.logger.info('Take {} uncorrelated u_nk for state ' + logger.info('Take {} uncorrelated u_nk for state ' '{}.'.format(len(subsample), index)) self.u_nk_sample_list.append(subsample) @@ -277,13 +277,13 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): dHdl = dHdl[dHdl.index.get_level_values('time') >= skiptime] subsample = statistical_inefficiency(dHdl, dHdl.sum(axis=1)) if len(subsample) < threshold: - self.logger.warning('Number of dHdl {} for state {} is ' + logger.warning('Number of dHdl {} for state {} is ' 'less than the threshold {}.'.format( len(subsample), index, threshold)) - self.logger.info('Take all the dHdl for state {}.'.format(index)) + logger.info('Take all the dHdl for state {}.'.format(index)) self.dHdl_sample_list.append(dHdl) else: - self.logger.info('Take {} uncorrelated dHdl for state ' + logger.info('Take {} uncorrelated dHdl for state ' '{}.'.format(len(subsample), index)) self.dHdl_sample_list.append(subsample) @@ -306,7 +306,7 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): if isinstance(methods, str): methods = (methods, ) - self.logger.info( + logger.info( 'Start running estimator: {}.'.format(','.join(methods))) self.estimator = {} # Use unprocessed data if preprocess is not performed. @@ -315,8 +315,8 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): dHdl = pd.concat(self.dHdl_sample_list) except (AttributeError, ValueError): dHdl = pd.concat(self.dHdl_list) - self.logger.warning('dHdl has not been preprocessed.') - self.logger.info( + logger.warning('dHdl has not been preprocessed.') + logger.info( 'A total {} lines of dHdl is used.'.format(len(dHdl))) if 'bar' in methods or 'mbar' in methods: @@ -324,24 +324,24 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): u_nk = pd.concat(self.u_nk_sample_list) except (AttributeError, ValueError): u_nk = pd.concat(self.u_nk_list) - self.logger.warning('u_nk has not been preprocessed.') - self.logger.info( + logger.warning('u_nk has not been preprocessed.') + logger.info( 'A total {} lines of u_nk is used.'.format(len(u_nk))) for estimator in methods: if estimator.lower() == 'mbar' and len(u_nk) > 0: - self.logger.info('Run MBAR estimator.') + logger.info('Run MBAR estimator.') self.estimator['mbar'] = MBAR().fit(u_nk) elif estimator.lower() == 'bar' and len(u_nk) > 0: - self.logger.info('Run BAR estimator.') + logger.info('Run BAR estimator.') self.estimator['bar'] = BAR().fit(u_nk) elif estimator.lower() == 'ti' and len(dHdl) > 0: - self.logger.info('Run TI estimator.') + logger.info('Run TI estimator.') self.estimator['ti'] = TI().fit(dHdl) elif estimator.lower() == 'mbar' or estimator.lower() == 'bar': # pragma: no cover - self.logger.warning('MBAR or BAR estimator require u_nk') + logger.warning('MBAR or BAR estimator require u_nk') else: # pragma: no cover - self.logger.warning( + logger.warning( '{} is not a valid estimator.'.format(estimator)) def write(self, resultfilename='result.out'): @@ -355,11 +355,11 @@ def write(self, resultfilename='result.out'): ''' # Write estimate - self.logger.info('Write the estimate as txt file to {} under {} ' + logger.info('Write the estimate as txt file to {} under {} ' 'with unit {}.'.format( resultfilename, self.out, self.units)) # Make the header name - self.logger.info('Write the header names.') + logger.info('Write the header names.') result_out = [['------------', ], [' States ', ], ['------------', ],] @@ -371,21 +371,21 @@ def write(self, resultfilename='result.out'): try: u_nk = self.u_nk_list[0] stages = u_nk.reset_index('time').index.names - self.logger.info('use the stage name from u_nk') + logger.info('use the stage name from u_nk') except: try: dHdl = self.dHdl_list[0] stages = dHdl.reset_index('time').index.names - self.logger.info('use the stage name from dHdl') + logger.info('use the stage name from dHdl') except: # pragma: no cover stages = [] - self.logger.warning('No stage name found in dHdl or u_nk') + logger.warning('No stage name found in dHdl or u_nk') for stage in stages: result_out.append([stage.split('-')[0][:9].rjust(9)+': ', ]) result_out.append(['TOTAL'.rjust(9) + ': ', ]) for estimator_name, estimator in self.estimator.items(): - self.logger.info('write the result from estimator {}'.format( + logger.info('write the result from estimator {}'.format( estimator_name)) # Write the estimator header result_out[0].append('---------------------') @@ -400,7 +400,7 @@ def write(self, resultfilename='result.out'): result_out[2+num_states].append('---------------------') - self.logger.info('write the staged result from estimator {}'.format( + logger.info('write the staged result from estimator {}'.format( estimator_name)) for index, stage in enumerate(stages): if len(stages) == 1: @@ -422,7 +422,7 @@ def write(self, resultfilename='result.out'): start = list(reversed(states)).index(lambda_min) start = num_states - start - 1 end = states.index(lambda_max) - self.logger.info( + logger.info( 'Stage {} is from state {} to state {}.'.format( stage, start, end)) result = estimator.delta_f_.iloc[start, end]*self.scaling_factor @@ -445,7 +445,7 @@ def write(self, resultfilename='result.out'): for i in range(num_states - 1)])) * self.scaling_factor result_out[3 + num_states + len(stages)].append( '{:.3f} +- {:.3f}'.format(result, error, ).rjust(21)) - self.logger.info('Write results:\n'+ + logger.info('Write results:\n'+ '\n'.join([' '.join(line) for line in result_out])) with open(join(self.out, resultfilename), 'w') as f: f.write('\n'.join([' '.join(line) for line in result_out])) @@ -467,16 +467,16 @@ def plot_overlap_matrix(self, overlap='O_MBAR.pdf', ax=None): matplotlib.axes.Axes An axes with the overlap matrix drawn. ''' - self.logger.info('Plot overlap matrix.') + logger.info('Plot overlap matrix.') if 'mbar' in self.estimator: ax = plot_mbar_overlap_matrix(self.estimator['mbar'].overlap_matrix, ax=ax) ax.figure.savefig(join(self.out, overlap)) - self.logger.info('Plot overlap matrix to {} under {}.' + logger.info('Plot overlap matrix to {} under {}.' ''.format(self.out, overlap)) return ax else: # pragma: no cover - self.logger.warning('MBAR estimator not found. ' + logger.warning('MBAR estimator not found. ' 'Overlap matrix not plotted.') def plot_ti_dhdl(self, dhdl_TI='dhdl_TI.pdf', labels=None, colors=None, @@ -502,13 +502,13 @@ def plot_ti_dhdl(self, dhdl_TI='dhdl_TI.pdf', labels=None, colors=None, matplotlib.axes.Axes An axes with the TI dhdl drawn. ''' - self.logger.info('Plot TI dHdl.') + logger.info('Plot TI dHdl.') if 'ti' in self.estimator: ax = plot_ti_dhdl(self.estimator['ti'], units=self.units, labels=labels, colors=colors, ax=ax, scaling_factor=self.scaling_factor) ax.figure.savefig(join(self.out, dhdl_TI)) - self.logger.info('Plot TI dHdl to {} under {}.' + logger.info('Plot TI dHdl to {} under {}.' ''.format(dhdl_TI, self.out)) def plot_dF_state(self, dF_state='dF_state.pdf', labels=None, colors=None, @@ -534,13 +534,13 @@ def plot_dF_state(self, dF_state='dF_state.pdf', labels=None, colors=None, matplotlib.figure.Figure An Figure with the dF states drawn. ''' - self.logger.info('Plot dF states.') + logger.info('Plot dF states.') fig = plot_dF_state(self.estimator.values(), labels=labels, colors=colors, units=self.units, scaling_factor=self.scaling_factor, orientation=orientation, nb=nb) fig.savefig(join(self.out, dF_state)) - self.logger.info('Plot dF state to {} under {}.' + logger.info('Plot dF state to {} under {}.' ''.format(dF_state, self.out)) def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', @@ -584,49 +584,49 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', matplotlib.axes.Axes An axes with the convergence drawn. ''' - self.logger.info('Start convergence analysis.') - self.logger.info('Check data availability.') + logger.info('Start convergence analysis.') + logger.info('Check data availability.') try: dHdl_list = self.dHdl_sample_list - self.logger.info('Subsampled dHdl is available.') + logger.info('Subsampled dHdl is available.') except AttributeError: try: dHdl_list = self.dHdl_list - self.logger.info('Subsampled dHdl not available, ' + logger.info('Subsampled dHdl not available, ' 'use original data instead.') except AttributeError: # pragma: no cover - self.logger.warning('dHdl is not available.') + logger.warning('dHdl is not available.') try: u_nk_list = self.u_nk_sample_list - self.logger.info('Subsampled u_nk is available.') + logger.info('Subsampled u_nk is available.') except AttributeError: try: u_nk_list = self.u_nk_list - self.logger.info('Subsampled u_nk not available, ' + logger.info('Subsampled u_nk not available, ' 'use original data instead.') except AttributeError: # pragma: no cover - self.logger.warning('u_nk is not available.') + logger.warning('u_nk is not available.') if estimator.lower() == 'mbar': - self.logger.info('Use MBAR estimator for convergence analysis.') + logger.info('Use MBAR estimator for convergence analysis.') estimator_fit = MBAR().fit elif estimator.lower() == 'bar': - self.logger.info('Use BAR estimator for convergence analysis.') + logger.info('Use BAR estimator for convergence analysis.') estimator_fit = BAR().fit elif estimator.lower() == 'ti': - self.logger.info('Use TI estimator for convergence analysis.') + logger.info('Use TI estimator for convergence analysis.') estimator_fit = TI().fit else: # pragma: no cover - self.logger.warning( + logger.warning( '{} is not a valid estimator.'.format(estimator)) - self.logger.info('Begin forward analysis') + logger.info('Begin forward analysis') forward_list = [] forward_error_list = [] for i in range(1, forwrev + 1): - self.logger.info('Forward analysis: {:.2f}%'.format(i / forwrev)) + logger.info('Forward analysis: {:.2f}%'.format(i / forwrev)) sample = [] if estimator.lower() in ['mbar', 'bar']: for data in u_nk_list: @@ -644,14 +644,14 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', forward_error_list.append(error) else: forward_error_list.append(result.d_delta_f_.iloc[0, -1]) - self.logger.info('{:.2f} +/- {:.2f} kBT'.format(forward_list[-1], + logger.info('{:.2f} +/- {:.2f} kBT'.format(forward_list[-1], forward_error_list[-1])) - self.logger.info('Begin backward analysis') + logger.info('Begin backward analysis') backward_list = [] backward_error_list = [] for i in range(1, forwrev + 1): - self.logger.info('Backward analysis: {:.2f}%'.format(i / forwrev)) + logger.info('Backward analysis: {:.2f}%'.format(i / forwrev)) sample = [] if estimator.lower() in ['mbar', 'bar']: for data in u_nk_list: @@ -669,7 +669,7 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', backward_error_list.append(error) else: backward_error_list.append(result.d_delta_f_.iloc[0, -1]) - self.logger.info('{:.2f} +/- {:.2f} kBT'.format(backward_list[-1], + logger.info('{:.2f} +/- {:.2f} kBT'.format(backward_list[-1], backward_error_list[-1])) convergence = pd.DataFrame({'Forward (kBT)': forward_list, @@ -678,7 +678,7 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', 'B. Error (kBT)': backward_error_list}) self.convergence = convergence - self.logger.info('Plot convergence analysis to {} under {}.' + logger.info('Plot convergence analysis to {} under {}.' ''.format(dF_t, self.out)) ax = plot_convergence(np.array(forward_list) * self.scaling_factor, np.array(forward_error_list) * self.scaling_factor, From c5a9b44f65c8496a9fefff98db002cc0e8e0cf1e Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 27 Mar 2021 17:53:21 +0000 Subject: [PATCH 017/123] Revert "logging is a global variable" This reverts commit 90d033a312ff5217a9501d3bfff4348f03dc061d. --- src/alchemlyb/workflows/abfe.py | 136 ++++++++++++++++---------------- 1 file changed, 68 insertions(+), 68 deletions(-) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 9eb494cf..427a5e30 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -84,26 +84,26 @@ def __init__(self, units='kcal/mol', software='Gromacs', dir='./', log='result.log'): logging.basicConfig(filename=log, level=logging.INFO) - logger = logging.getLogger('alchemlyb.workflows.ABFE') - logger.info('Initialise Alchemlyb ABFE Workflow') + self.logger = logging.getLogger('alchemlyb.workflows.ABFE') + self.logger.info('Initialise Alchemlyb ABFE Workflow') - logger.info('Set temperature to {} K.'.format(T)) + self.logger.info('Set temperature to {} K.'.format(T)) self.T = T self.out = out self.update_units(units) - logger.info('Finding files with prefix: {}, suffix: {} under ' + self.logger.info('Finding files with prefix: {}, suffix: {} under ' 'directory {} produced by {}'.format(prefix, suffix, dir, software)) file_list = glob(join(dir, prefix + '*' + suffix)) - logger.info('Found {} xvg files.'.format(len(file_list))) - logger.info('Unsorted file list: \n{}'.format('\n'.join( + self.logger.info('Found {} xvg files.'.format(len(file_list))) + self.logger.info('Unsorted file list: \n{}'.format('\n'.join( file_list))) if software.lower() == 'gromacs': - logger.info('Using {} parser to read the data.'.format( + self.logger.info('Using {} parser to read the data.'.format( software)) extract_u_nk = gmx.extract_u_nk extract_dHdl = gmx.extract_dHdl @@ -115,31 +115,31 @@ def __init__(self, units='kcal/mol', software='Gromacs', dir='./', for xvg in file_list: try: u_nk = extract_u_nk(xvg, T=T) - logger.info( + self.logger.info( 'Reading {} lines of u_nk from {}'.format(len(u_nk), xvg)) u_nk_list.append(u_nk) except: # pragma: no cover - logger.warning( + self.logger.warning( 'Error reading read u_nk from {}.'.format(xvg)) try: dhdl = extract_dHdl(xvg, T=T) - logger.info( + self.logger.info( 'Reading {} lines of dhdl from {}'.format(len(dhdl), xvg)) dHdl_list.append(dhdl) except: # pragma: no cover - logger.warning( + self.logger.warning( 'Error reading read dhdl from {}.'.format(xvg)) # # Sort the files according to the state if len(u_nk_list) > 0: - logger.info('Sort files according to the u_nk.') + self.logger.info('Sort files according to the u_nk.') column_names = u_nk_list[0].columns.values.tolist() index_list = sorted(range(len(file_list)), key=lambda x:column_names.index( u_nk_list[x].reset_index('time').index.values[0])) else: - logger.info('Sort files according to the dHdl.') + self.logger.info('Sort files according to the dHdl.') column_names = sorted([dHdl.reset_index('time').index.values[0] for dHdl in dHdl_list]) index_list = sorted(range(len(file_list)), @@ -147,7 +147,7 @@ def __init__(self, units='kcal/mol', software='Gromacs', dir='./', dHdl_list[x].reset_index('time').index.values[0])) self.file_list = [file_list[i] for i in index_list] - logger.info('Sorted file list: \n{}'.format('\n'.join( + self.logger.info('Sorted file list: \n{}'.format('\n'.join( self.file_list))) self.u_nk_list = [u_nk_list[i] for i in index_list] self.dHdl_list = [dHdl_list[i] for i in index_list] @@ -193,7 +193,7 @@ def update_units(self, units): the unit when outputting text file or plotting the results. ''' if units is not None: - logger.info('Set unit to {}.'.format(units)) + self.logger.info('Set unit to {}.'.format(units)) if units == 'kBT': self.scaling_factor = 1 elif units == 'kJ/mol': @@ -231,11 +231,11 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): dHdl_sample_list : list The list of dHdl after decorrelation. ''' - logger.info('Start preprocessing with skiptime of {} ' + self.logger.info('Start preprocessing with skiptime of {} ' 'uncorrelation method of {} and ' 'threshold of {}'.format(skiptime, uncorr, threshold)) if len(self.u_nk_list) > 0: - logger.info( + self.logger.info( 'Processing the u_nk data set with skiptime of {}.'.format( skiptime)) @@ -262,13 +262,13 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): 'Decorrelation method {} not found.'.format(uncorr)) if len(subsample) < threshold: - logger.warning('Number of u_nk {} for state {} is ' + self.logger.warning('Number of u_nk {} for state {} is ' 'less than the threshold {}.'.format( len(subsample), index, threshold)) - logger.info('Take all the u_nk for state {}.'.format(index)) + self.logger.info('Take all the u_nk for state {}.'.format(index)) self.u_nk_sample_list.append(u_nk) else: - logger.info('Take {} uncorrelated u_nk for state ' + self.logger.info('Take {} uncorrelated u_nk for state ' '{}.'.format(len(subsample), index)) self.u_nk_sample_list.append(subsample) @@ -277,13 +277,13 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): dHdl = dHdl[dHdl.index.get_level_values('time') >= skiptime] subsample = statistical_inefficiency(dHdl, dHdl.sum(axis=1)) if len(subsample) < threshold: - logger.warning('Number of dHdl {} for state {} is ' + self.logger.warning('Number of dHdl {} for state {} is ' 'less than the threshold {}.'.format( len(subsample), index, threshold)) - logger.info('Take all the dHdl for state {}.'.format(index)) + self.logger.info('Take all the dHdl for state {}.'.format(index)) self.dHdl_sample_list.append(dHdl) else: - logger.info('Take {} uncorrelated dHdl for state ' + self.logger.info('Take {} uncorrelated dHdl for state ' '{}.'.format(len(subsample), index)) self.dHdl_sample_list.append(subsample) @@ -306,7 +306,7 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): if isinstance(methods, str): methods = (methods, ) - logger.info( + self.logger.info( 'Start running estimator: {}.'.format(','.join(methods))) self.estimator = {} # Use unprocessed data if preprocess is not performed. @@ -315,8 +315,8 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): dHdl = pd.concat(self.dHdl_sample_list) except (AttributeError, ValueError): dHdl = pd.concat(self.dHdl_list) - logger.warning('dHdl has not been preprocessed.') - logger.info( + self.logger.warning('dHdl has not been preprocessed.') + self.logger.info( 'A total {} lines of dHdl is used.'.format(len(dHdl))) if 'bar' in methods or 'mbar' in methods: @@ -324,24 +324,24 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): u_nk = pd.concat(self.u_nk_sample_list) except (AttributeError, ValueError): u_nk = pd.concat(self.u_nk_list) - logger.warning('u_nk has not been preprocessed.') - logger.info( + self.logger.warning('u_nk has not been preprocessed.') + self.logger.info( 'A total {} lines of u_nk is used.'.format(len(u_nk))) for estimator in methods: if estimator.lower() == 'mbar' and len(u_nk) > 0: - logger.info('Run MBAR estimator.') + self.logger.info('Run MBAR estimator.') self.estimator['mbar'] = MBAR().fit(u_nk) elif estimator.lower() == 'bar' and len(u_nk) > 0: - logger.info('Run BAR estimator.') + self.logger.info('Run BAR estimator.') self.estimator['bar'] = BAR().fit(u_nk) elif estimator.lower() == 'ti' and len(dHdl) > 0: - logger.info('Run TI estimator.') + self.logger.info('Run TI estimator.') self.estimator['ti'] = TI().fit(dHdl) elif estimator.lower() == 'mbar' or estimator.lower() == 'bar': # pragma: no cover - logger.warning('MBAR or BAR estimator require u_nk') + self.logger.warning('MBAR or BAR estimator require u_nk') else: # pragma: no cover - logger.warning( + self.logger.warning( '{} is not a valid estimator.'.format(estimator)) def write(self, resultfilename='result.out'): @@ -355,11 +355,11 @@ def write(self, resultfilename='result.out'): ''' # Write estimate - logger.info('Write the estimate as txt file to {} under {} ' + self.logger.info('Write the estimate as txt file to {} under {} ' 'with unit {}.'.format( resultfilename, self.out, self.units)) # Make the header name - logger.info('Write the header names.') + self.logger.info('Write the header names.') result_out = [['------------', ], [' States ', ], ['------------', ],] @@ -371,21 +371,21 @@ def write(self, resultfilename='result.out'): try: u_nk = self.u_nk_list[0] stages = u_nk.reset_index('time').index.names - logger.info('use the stage name from u_nk') + self.logger.info('use the stage name from u_nk') except: try: dHdl = self.dHdl_list[0] stages = dHdl.reset_index('time').index.names - logger.info('use the stage name from dHdl') + self.logger.info('use the stage name from dHdl') except: # pragma: no cover stages = [] - logger.warning('No stage name found in dHdl or u_nk') + self.logger.warning('No stage name found in dHdl or u_nk') for stage in stages: result_out.append([stage.split('-')[0][:9].rjust(9)+': ', ]) result_out.append(['TOTAL'.rjust(9) + ': ', ]) for estimator_name, estimator in self.estimator.items(): - logger.info('write the result from estimator {}'.format( + self.logger.info('write the result from estimator {}'.format( estimator_name)) # Write the estimator header result_out[0].append('---------------------') @@ -400,7 +400,7 @@ def write(self, resultfilename='result.out'): result_out[2+num_states].append('---------------------') - logger.info('write the staged result from estimator {}'.format( + self.logger.info('write the staged result from estimator {}'.format( estimator_name)) for index, stage in enumerate(stages): if len(stages) == 1: @@ -422,7 +422,7 @@ def write(self, resultfilename='result.out'): start = list(reversed(states)).index(lambda_min) start = num_states - start - 1 end = states.index(lambda_max) - logger.info( + self.logger.info( 'Stage {} is from state {} to state {}.'.format( stage, start, end)) result = estimator.delta_f_.iloc[start, end]*self.scaling_factor @@ -445,7 +445,7 @@ def write(self, resultfilename='result.out'): for i in range(num_states - 1)])) * self.scaling_factor result_out[3 + num_states + len(stages)].append( '{:.3f} +- {:.3f}'.format(result, error, ).rjust(21)) - logger.info('Write results:\n'+ + self.logger.info('Write results:\n'+ '\n'.join([' '.join(line) for line in result_out])) with open(join(self.out, resultfilename), 'w') as f: f.write('\n'.join([' '.join(line) for line in result_out])) @@ -467,16 +467,16 @@ def plot_overlap_matrix(self, overlap='O_MBAR.pdf', ax=None): matplotlib.axes.Axes An axes with the overlap matrix drawn. ''' - logger.info('Plot overlap matrix.') + self.logger.info('Plot overlap matrix.') if 'mbar' in self.estimator: ax = plot_mbar_overlap_matrix(self.estimator['mbar'].overlap_matrix, ax=ax) ax.figure.savefig(join(self.out, overlap)) - logger.info('Plot overlap matrix to {} under {}.' + self.logger.info('Plot overlap matrix to {} under {}.' ''.format(self.out, overlap)) return ax else: # pragma: no cover - logger.warning('MBAR estimator not found. ' + self.logger.warning('MBAR estimator not found. ' 'Overlap matrix not plotted.') def plot_ti_dhdl(self, dhdl_TI='dhdl_TI.pdf', labels=None, colors=None, @@ -502,13 +502,13 @@ def plot_ti_dhdl(self, dhdl_TI='dhdl_TI.pdf', labels=None, colors=None, matplotlib.axes.Axes An axes with the TI dhdl drawn. ''' - logger.info('Plot TI dHdl.') + self.logger.info('Plot TI dHdl.') if 'ti' in self.estimator: ax = plot_ti_dhdl(self.estimator['ti'], units=self.units, labels=labels, colors=colors, ax=ax, scaling_factor=self.scaling_factor) ax.figure.savefig(join(self.out, dhdl_TI)) - logger.info('Plot TI dHdl to {} under {}.' + self.logger.info('Plot TI dHdl to {} under {}.' ''.format(dhdl_TI, self.out)) def plot_dF_state(self, dF_state='dF_state.pdf', labels=None, colors=None, @@ -534,13 +534,13 @@ def plot_dF_state(self, dF_state='dF_state.pdf', labels=None, colors=None, matplotlib.figure.Figure An Figure with the dF states drawn. ''' - logger.info('Plot dF states.') + self.logger.info('Plot dF states.') fig = plot_dF_state(self.estimator.values(), labels=labels, colors=colors, units=self.units, scaling_factor=self.scaling_factor, orientation=orientation, nb=nb) fig.savefig(join(self.out, dF_state)) - logger.info('Plot dF state to {} under {}.' + self.logger.info('Plot dF state to {} under {}.' ''.format(dF_state, self.out)) def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', @@ -584,49 +584,49 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', matplotlib.axes.Axes An axes with the convergence drawn. ''' - logger.info('Start convergence analysis.') - logger.info('Check data availability.') + self.logger.info('Start convergence analysis.') + self.logger.info('Check data availability.') try: dHdl_list = self.dHdl_sample_list - logger.info('Subsampled dHdl is available.') + self.logger.info('Subsampled dHdl is available.') except AttributeError: try: dHdl_list = self.dHdl_list - logger.info('Subsampled dHdl not available, ' + self.logger.info('Subsampled dHdl not available, ' 'use original data instead.') except AttributeError: # pragma: no cover - logger.warning('dHdl is not available.') + self.logger.warning('dHdl is not available.') try: u_nk_list = self.u_nk_sample_list - logger.info('Subsampled u_nk is available.') + self.logger.info('Subsampled u_nk is available.') except AttributeError: try: u_nk_list = self.u_nk_list - logger.info('Subsampled u_nk not available, ' + self.logger.info('Subsampled u_nk not available, ' 'use original data instead.') except AttributeError: # pragma: no cover - logger.warning('u_nk is not available.') + self.logger.warning('u_nk is not available.') if estimator.lower() == 'mbar': - logger.info('Use MBAR estimator for convergence analysis.') + self.logger.info('Use MBAR estimator for convergence analysis.') estimator_fit = MBAR().fit elif estimator.lower() == 'bar': - logger.info('Use BAR estimator for convergence analysis.') + self.logger.info('Use BAR estimator for convergence analysis.') estimator_fit = BAR().fit elif estimator.lower() == 'ti': - logger.info('Use TI estimator for convergence analysis.') + self.logger.info('Use TI estimator for convergence analysis.') estimator_fit = TI().fit else: # pragma: no cover - logger.warning( + self.logger.warning( '{} is not a valid estimator.'.format(estimator)) - logger.info('Begin forward analysis') + self.logger.info('Begin forward analysis') forward_list = [] forward_error_list = [] for i in range(1, forwrev + 1): - logger.info('Forward analysis: {:.2f}%'.format(i / forwrev)) + self.logger.info('Forward analysis: {:.2f}%'.format(i / forwrev)) sample = [] if estimator.lower() in ['mbar', 'bar']: for data in u_nk_list: @@ -644,14 +644,14 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', forward_error_list.append(error) else: forward_error_list.append(result.d_delta_f_.iloc[0, -1]) - logger.info('{:.2f} +/- {:.2f} kBT'.format(forward_list[-1], + self.logger.info('{:.2f} +/- {:.2f} kBT'.format(forward_list[-1], forward_error_list[-1])) - logger.info('Begin backward analysis') + self.logger.info('Begin backward analysis') backward_list = [] backward_error_list = [] for i in range(1, forwrev + 1): - logger.info('Backward analysis: {:.2f}%'.format(i / forwrev)) + self.logger.info('Backward analysis: {:.2f}%'.format(i / forwrev)) sample = [] if estimator.lower() in ['mbar', 'bar']: for data in u_nk_list: @@ -669,7 +669,7 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', backward_error_list.append(error) else: backward_error_list.append(result.d_delta_f_.iloc[0, -1]) - logger.info('{:.2f} +/- {:.2f} kBT'.format(backward_list[-1], + self.logger.info('{:.2f} +/- {:.2f} kBT'.format(backward_list[-1], backward_error_list[-1])) convergence = pd.DataFrame({'Forward (kBT)': forward_list, @@ -678,7 +678,7 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', 'B. Error (kBT)': backward_error_list}) self.convergence = convergence - logger.info('Plot convergence analysis to {} under {}.' + self.logger.info('Plot convergence analysis to {} under {}.' ''.format(dF_t, self.out)) ax = plot_convergence(np.array(forward_list) * self.scaling_factor, np.array(forward_error_list) * self.scaling_factor, From 26e83fb884bd34cba047df3ac16367751044dbf6 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 29 Mar 2021 22:17:19 +0100 Subject: [PATCH 018/123] move some part to other PR --- docs/images/dF_t.png | Bin 44948 -> 0 bytes docs/visualisation.rst | 49 ------------ ...chemlyb.visualisation.plot_convergence.rst | 19 ----- .../alchemlyb.visualisation.plot_dF_state.rst | 2 +- .../alchemlyb.visualisation.plot_ti_dhdl.rst | 2 +- src/alchemlyb/constants.py | 6 +- src/alchemlyb/estimators/ti_.py | 43 ----------- src/alchemlyb/preprocessing/subsampling.py | 1 + src/alchemlyb/tests/test_ti_estimators.py | 5 -- src/alchemlyb/visualisation/__init__.py | 3 +- src/alchemlyb/visualisation/convergence.py | 72 ------------------ src/alchemlyb/visualisation/dF_state.py | 5 +- src/alchemlyb/visualisation/ti_dhdl.py | 5 +- src/alchemlyb/workflows/abfe.py | 8 +- 14 files changed, 19 insertions(+), 201 deletions(-) delete mode 100644 docs/images/dF_t.png delete mode 100644 docs/visualisation/alchemlyb.visualisation.plot_convergence.rst delete mode 100644 src/alchemlyb/visualisation/convergence.py diff --git a/docs/images/dF_t.png b/docs/images/dF_t.png deleted file mode 100644 index 625e9a5a91dca985c6b1e88cafabb7d951d65dbb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 44948 zcmeFZWl&sg)GgRJ!6mr6TL>E5LxP6jfe1=H8U}j^@&CbpKl9lqaqob{Z zAP0xl|M3EL8+%g@EVY53;7y*{%4s=3AeaWwzc7WO1!fRP1*3# zTH}d&(JVK^lF)!kLk<4;PpJLRACGQev-RS?ym1RTn$uC^Js&4xZ1I{cl!jg=d?nI< zY^@Msu#tGx^bl!NVZ%@$wZzIr5KIorI*#T2zmsJ-k?Z{&ZCx}`LVnB3A9M5aYMmTH zLbShv{TPF;`x*vgA3>1}sYd(kpzR)VPKJy|@+=@|uFb0q8wba)M5BUN`zXmY{Z}xB z_vbsr168Bad^IzA@$uyNb>T&0#S8OlLqB`%6EMY5Gke18u_UAS?Um#MJ zmzRfp{c5>3InO_o_9{3GtRBckg==ffno?5gwsg`$`|?HbnZ;!_e<#3Z1rMT@TFSSh-*Dj!S(fgv^dZ3*|Y=nTHHa5f4tb8Fu9`; z@g&3vg$IAG7CkCvF8}^jZD?we1pj@s*ph7}n1<2he9*yC(d-vPBTLQ7iiUxKVe917 z?F$2UeAsl9{o}`v?t^lxr^knet#ps&qa#dNS=sZ$1(G)j?17ZX`Rx_p2V@i!^S{H% zA%!iUFhM}zkxnx_3VE=Pu;j0JWq%2We^_7Hg=v*sLRXk5MD>oOOK{2&G6qljWZcS!?p=rue(%gV=bw#@ zR9sxxK2LW`F^P%DaqY40hqNp3EsU3XJ)K1PEbBkr5`Y`E=(>h(WNMnz&_HNx zY<#^QtCXqCc)XS2vwBd!#5*_TO7palpuc*&(s_M%-+7ek}hV?E z6WO0Xf1nFd>3L(%s@pK~vU#sBj)jqt5d!&KtG&O}M&WgP&UJ8faDWcEK%{(nIP%FF z+d2OGhGXSsD}xVJjMrxI>-xHpJc}?yDy~6v45vv09K*@KeexeFEQ8QYzSF5V11#@V=DU!vuYA zB0tLIsvh#{>*|>PjfRbQowpf8zE~=zBSo;$gk_(5~t^GFm{gp`W z?b{kn{WA5=sVM@m%b0I34=k#N4i}p2mb|VErpiC?yB58{M@nE=!MYP z0QUv?D@w}t^VXj~XU`X1X^svLBN@_Nvlk!O5GX-(O1ilTko(-*@xOZ2xv)ScEiH{z zUf~a->1&xzeMfgULX_}jV)t5nLc-eFuxJmsb=btjD5j>Sqn-4Ikg-||jZtp}v#T#~ zNVXv9fcxKk{`bv=UdoB-+RDn8<715Y+HWDfeu_Ns7Psd+*~3T8XBhEpIziXhuA{gD zIhB>1p68>o5Hv$awi}Qvr7|>1HNRwd-#&Zgyx+b*Q?0OJ-FK^-Q8LT4S0BXh2`Zz2?+^>_Z@+#$Y^K*@_8N6 zRFajSN2vjp&^&0CG9vLpf>b^DhMmS!tW6s%Pi3GVA^eKoqWOIqLK6OMC?pnFu?>!p zzronb&feM^^XTp>04D&i%&ptUk7hAZ{Qlx{fR>KVpzk|VVQFbsp-K*?&!gv8 zLavUNE^wTlM_2nZxV|^Txl9MVlV+9@Pg#<|5Zh}!Y$x(GcF1cvIWe%Ai;IhwL8(P2 z;iVK5oHg(9x!Fj-e*QcFY~IAaAT%Psi4u(*)8TZmFMrS3v_%+n27wx(a^6$S-lB&o z&LLJ%+k7dEJrk^EzAf;%NLgaHP7%Z-cOl=+YIpq&nEYso}Jkp%<@1YmOQ7({4O${?vZ?5jadviWDN|ccrB-W zfBbkc@3xhOAI#Ho@|y&Dy#UfwdEGg`i;}exy+4M7w2zLee8+ArI^exbANR_9uVZ3D ze|a!BwOthwBKS>_z*kB}W_A5fEF&{>$L+-)m+25Gh}Rxw@m`SlqiQXtK3yFy*xK51 z#wn!lOqyicXf z9A_e#x?4=7`aLg)<>-!$05X zG;2O%*I6%UADPq33=Iv1L_~B4q7o37Zjr2nZ`-MC3uMcd%*jewl^p(UTJkhNW+(pk z{c|CM;1FZK(_FG0th=RU>9$D|*YA|ggSq-}`UDbCzzrr#wLpt;wdB*bO+V*KLxGDs zwtSB%M(Rt5d)so{feKPTWWDLAwX-{%fXi`P8I6Q@VD*HV87X=>E7< zPC)@d`RTai_v}JW@Jf@<+2$aAb&<_t^UQO`#)gK(AP;P8Cr1#oMQ^a+t2eMy_WUtL z+I4JN0@-xxUNYTPXqU;Ts($KeuU(Ly1BdwW=5!<5D}wJ-!R^!rP3*;s7nO!R7*J#wORs=4!G%b<W&t64aB#*@;C4jkFT3Qe^rJS#lsCup| z2yZh)$f&5OdjI_S1>M-+zTpfcarZ=OC`nlzTqcRVer8)Y$vnRmi^dV!RXJ{L9O&B7 zsNz_9pk85K2J&O7s1Lc=*`^QSUK;=#t~+@Nl_rD4P+$(SG9w2E--51Ex)2d`L3BTK z+GJ3|$cXlL6h)L|FoprBocXUKl9H0--n_v*7;E$KIX_yW=!qoj1mJ80 z{5tP>Hkf~q;dM#$k{JE)=N>yeJba?wZ+34;gYy9~sB|LG^bz^$&{wrc%G1-+|BWJ! z$`(y5i~~8(xzRWsa!_0yD*}b2Ea9ca?_T;QLAO)d^Qk7=WD;LUdpJjH2#D2>r|W+J zj+J`<9?#Fu&vwqL-r(W(V)}`tx4b@HnQrmm1J{Mgz`y_k_I!I3&ZhYUaVTBbA{nVrnNjLYD0$2pv~tiB->${(I<<{iBep zg;K-07=yX#@A?*1YlZ9S*2p{4%=614)2PV2YJGjv=y;U#r1ATv@C<{Qy)5R>8E*oc|FDl~#bk`3mQ-YIq zM{4S9k`D#RaR1U?2PFUhkX%r@1XPLYT|dutt3@;?89~R5jc9>al6Zvh8Ab21j!&)k zHlZXO8T9-}7zMmZiAq|f4sF^bluHKd^zXMyj?=tZU`?OUt>F-B#8GwFX=&j#D)e|* zT%4U{0O?@exPjjca^msnsk#3oHnxgK4}jRcVdoPQ4cD`)7~|w2zA`!Dpeo|v;6z79`yRF4VL%&uWo2a@ z-h_~lS*jrzu(~*2PCP>*mmwR=lFWhgW2n)|J_jaVSLon5Wbm2JpN;oOUpWt)dk?1z z_xANksH@|nprAx2Bm_U*wLQT>0N=4Pp`mY8VuFLM^6EGAT;mUeobzyx%Jzu2< z5=uBgFN#o8n?C^##7S(!Sb>G+j!D2%8iX?D&$6)D zi%)GI!WT>0{Kx$&f9e0*Nl(wdr= z&7iX(?}?=TFd4%&TTu;8=a zZ}GVQ_o)WjVxQY{Goh0%+>pq~pzv^1HDd9zb%j81aB=s9t>xuWMDF&v zA?@Gj-hd0!cGAmK0X3W(fi8JZPU}55OgHU zisn-kke5M42(nx2dO~Cq=1|7<^#ov*-}9PNyrn#JF~J93l!KMfc&^zk3W{wMuZgR#30At7W^r%A!hsL9Nx5+ z^ZVRS_m6a+wtQ?x@O5j8w6Ua8+Lym`1rgdt9|AQ_qd9dwy{hhp+qr8TbTfoE9_yYF z=d0O>f9wc&20@ej z`V|HS0tk^cD9@NJ9j*)tIJvO<^7d#B=(Tal)+*W_JHRj4SXjOQcIAM4VzMRR^VnP@ z#KVJ^m6ZkYZ3Bz|lmd~7Bn@rxxJo{7xjr^PBNKeCTJ*d?DFc5{#ca^6LbuTmG;R=I ze}6a(GQj{5uUpM~q6OzUtLX+innlkuh33cGy?CE!r@%O8!>gXbr zvA-YZJYHcvu1K6ewA&nw1cM z4iow9sGSdHjqyE^{d@tBhT+~q4$5m%H;EMl${I8^y@GV6BIo>e7kdO85n%NO(q3si z_pG^W{@?q1yOv)_ACc7|VpCn|g_CvQR?)1|I0LHkTT^2SeJWp^6utlaAlYno5Q9~H z?d+T5&Gq#}s|5Kx*Q*7W6M&bsLHT@qfHw0GXvg1a7$vNJ)ClMv7>Hsm{SJxEJu1j~ zad8OXw8!ccQg{tO`)F%t_lv})*rxBZ4h)>!9t)t`|;UX^{!h4OmB_Z*xOKoy)%Fz4SJ)g0A2b|uC34)g-1YGfnGLX ze~d3Oo?)xVc9COI|IE5sv}f_mSCn*Oai{n(^%U@(kAPTY`+{-0szu8|?9{~)>D+y6 z>)3}qU@0x0!In>EWo2fcJTaeHY* z2JqjUoB|~!B?MHm_DHfGlM>XpLvA6*vEL*cc>6+8*kkm$6)O;>*EFxupmCLYYYC<$A6jgg*o>57{629nhk4eBI+A}bSSx)q4O4Z3?U7} z&Bt221MX^NU>_;7qLA|d_juW|;lgH?lfr}h(pQt%`+bUCp^B%c5Fq5G=Afh(B#2}O z)LAI`E$!ykXm%IJq5)+s0n3d6x}Z8&&o$L%K#92mib_RKFS6)eLGo#==HFznm7{+L z>3dyFXaEUDROIp6P`APM*GpZ6GIhhX-w}zN2C(DiJgcj#oIqzOd3+1NATd3)B~oI@ z$ZcXs^sHa&PhwLF4|o87l5M`rqrW+be;!VsO5&!F8jy&mFF|eN^RxtLqE674fJ8^( zeVt`i9!v+QR7o|p=MZS;m``Gv1LfACtY%i*BX_hMEwmW`*HK7d06GzqqyZSQim6V( z;Q{Izl!rvcG{L~c#6-l3th}v=u&|CshpkG-9aW*5_1FvYCqQua0IJ*iwp6tUFkD|d zIwan4zA);=vYf4X_Ti^v)$TA5nsN-F@(9Qe#HpCHKBwk@1g;Pp=348{o7d#78i0n@OgXMt^XiohCpu(CP!Uj+L#g z;1ruyB5=Qc10iG357XlQpXI*vt%2KzsS>&LRn}n?pqHx&23mt#d;qwm(DO^}4{r_4{Pf^74W!0%e1C0F_KsnFP&tnk~1P`Y0 zP3eeobK?S@M3kC_2J~Sk`_;qRrS%}k0jwjLr%EL#NKC?O$ystXW12xFBt!}XDX5AA z*elbr{(nUO-Isu${0rDyR=s9I*(mbv(HvPW>v{fxL@qe63-R|#Cd{vIZ&v|ihw?sb zlLCN|HW^9{)v7jG883R5$YX(j*tkVbD(Kj&wgm{U9zfZLYv_C7K_#Ky7#c38T{VFD zXVwKn1)F}TE(bp0w)M%+{d^BtP9t<2ajbwk>i{-4H@6ii`2s}IzaLw;41Z?=D)$<& zB+?t)rbAM%UxyYl7y~{M1a{YKtpyB3G6?M#(A6q|itsM2u#j;cfDUMNCb7!-zx11Z zBnt_wW6*xn#ztl4-+t392VYBLB~FMIjX+<2|B?F`?SOPjLmh?HQ%kqbmTP%+miGW^ zo7me?6Q-=)aTF}osmIvZ*a!&^w>YuGAQuXG9ZMhDN3WFr6`)!O1OP}-T1?bU|K`5V z7rYqVyBINZ0)N=7DWI~qitOJV@+t;eT1s|y3=rv191w&~f<;EQu(}2S&9C8MCACcu zy``nB5HTPdISPL8ciN5h5FijpaL}3Oh2E&_$ow$`%9xlKu&HPPvjZ(;fLB5CxZF2; zk3}6LCzXL9&dFjSpZ3#}#&ZRC6Ru1qe7%jmrojs=G{9cc1*l@*<>`L99AQ8RUw+1l zBo*+B61hbNiQ`GzQ3wE)>h!Ovsig0sf}^4^0T~ERfhJBKhKX|f)x}HFe}W}e@fA{+ z92R`x#4i331&BdFJ%#oG{x1yBw=kUWLSQ*wmQ-8K@xCMb=YSX6UHEd${9-?6bO_Y3nek$-f_?{RTd0HlMT15}G%qR+U4z0PSq&IpC` ziCl(=P?`onRw#mfL1qZ#IcNYPq7w%+H1K@xhy7G z0lD7+lntz?Mtqt-mNdY$4{pv76^zj-FAm_Ta&;Q&mjNGv;m>uLq zq^{dUq+CNofRCuNhf2nVK@SZc*_ZzR^V@HBeQTrxO^gvzyd|8(y-8|H4*u%5d=bpJy#? zTK>~EQym_EjpU48Kzu=CaFzQfR(Ta) zb;~lkwf*D;Q&F`>Jl|t6?<&QiPsQA?`%qh(0!SOFacbadm@R^`LXSWHoX3tqA-y;a4BZkI>BqoJ+_o0|VvU=RZMDx}@>o3i&N#e1LZ~HnOX1Tg zF@vrp8{kn2(^O4!r#z$oEc19vR9{}DbE?z`#&5eq|KBPAdR}=f9DFSa|GC9kSUrJy zcV@$y%SgnGP&bU@`@b(<+Pr!5CpAsssJ2@g%kifP0@c3_NB3H-%LIa3gOj0+-AuSC zL?eUO9Gc&>P(fQa%k=)E@*q+RIl`+tK8vS&vN@QR#nXS^9T~c&eZtm>pD|?r14H+n z>t(C{GTl|P;E8}nO*e>|TFF3Fgrb~ ztS@QX&I3%to(VYN8B&HU+*;hc0>#yNZl$c#)j;>XZ5C!*lS{Z`Qek|U>aBK<4k>WI zkp;<4>un@Aci(5HV?vN!ZF%Q!F)N+ULcu8~Lzk2Uw?;eH>z3=U$UP=}*`#GtPp=BZ zH`~iY!8p*vnFtoW=(#wj z9Q}I%%GC=W>W@Q9l%H!G&TmV4@11KKzUg6FsTj2MOvQf(&8%GMf-N+G;IEQmW%B~& zbJ4oUF@{f2WV(yuTyEn z!v99()3ty10Zg?C4OkTM&Gq?RhsMi;;PNPuNQ);O6J{0xPvoSfQiW;^dNCje3u50D%+w7}io6)s}t1=ub0gZ=h? z=+)83nIFa>QBksQ-+tA4*yvB70kkw|PN*Q1%*-fI90?dh+pRPgz$r90OWi@MzM`U{ z2~3DCyJ-cI_{fg!!P$n~!?G~Q_%qH|RFc>?D?PTz7*%gPsP&!*P+Sgt=^1*|oOd!R zIvmCsQxHYR8%|&J8_PddV_~Z>Ocl9{nUC!_YSvqm_4W5-6A+kI4FMUH3m7F-W`GzA zT{ATe4V`(@jlQ@B03KHMS;;GWRyq*i5fP~Y6b4UmuNcYyB+@$Kk@TUf*~m8f86Y|jgP`k_Mt0w%ptb*`&T{G|glw;m((#7N25x#C-6XLrZY?`{=Rr zcuCzmKQCoWp!bMW<23gcoQ{zpkXu!lWVPQLfUIXr;tad=~u(V`l*ZtgJD#;utL7yN_U;p&Z&b-BsY|hT{ zo^ajjaltaB*h1s{22_dWTftoB_arstiXQh5*eA)Z({X~rWJO0Ub24nu1WNKAAmUvd zco_5A+0{A#A1&wV6x8aR_SDgcUm}MICKdWzuOI=jR_J=!KO+5InK}&<(=!+t7&aUJ zTAiO^_PVYc(y2*jl*JAXgLQWc+_-S{2Yx-u{SHyHVNYIHonF(nDq6p%^t0EeNB^yq z3n9?d0)*H7xq8f^q9Qptx%QHZ*06;pmo-48Lmucbps8l{aSbEB{lnIl$=X!b}dM9+$PiNTcsp%7nS(gHnQ(r$2Y@EY*7v;(?m6!3yj zo(^i9QFwDqOvk|R56uiZx=$y~z{=DK{A0l&6zA=F$bdxwnmj(Iy&WI7+W`>p_Lm30 z+nu9YUVft06|pD`T>Vkd@OTQ_{(?7uP%zDTpif92GrO+XYyn06KYrtFnX#f%-%{Pi zAHNO&4Oy!-1Ndya`|A_Lt3ALg_|N1_mFZ07UaIt{*`2@+2|xHSzVe#2Oc@`=UUio*=y&9oy)^HP2lZ?;BJ~q+3`|3*@{J7C4}^y$Hg@q zpN69kxq(Be&CwAl$C=E+Q3m@^cpm})JPp44Q;*^H9bdWuK7U0jt)Vy2}ScU=B7^62WinAPqS%O zxuU2;-wj>f$L{aI8`LKRg><)X($H-^xjXGY9a=u7p|Q4Vff{|Wz=uAGJ8r&ye@xZC+m#>ZIcAogB3OfxZN1irZzpKr^_vz=ss_6+aWdOle}Wa6ee9MXD8aSzi8ZN4J9bI?p!;SK032hbjk&UhSWappRV(xW~tZWU0N& zB2P<<9tkpqFye zd|_%j=3!-r4WCx7B_5JFA)%vcdE$ETMgIL+>yM5Rm9bPxsRHi(DXcR5-eKxAddgU?)JsUlbiWW?-Uga$SC>?Q9KV?uRHe zIEu`-c1QM9Dh*$O=(aw6ym?iZ5~QC*RzY@rTE7zz7HK1eY*#*&G_Z2Y6gz?8^r=dvGh&J<&0ipy~2@`hi|=s z9r=v50j3iC*LTG}I}0gcP2ENeQxez$rIe4{Ipkj&iH} zER;fM9WI#1JVNCWfJvlm@%LoN0qXs!b5a+>i(1d*J(a7975;losyBQaa4Z(eTH{Zqqa(6U-Bq7I^68Y!1^a6;?sipw&v67 z*i(6&BOl!5x^C>JyiRyNQ<&`@s`P{2R}n_RL$XYCq6vb5cb z@X96Ac9?x^i@kp6->$*xnJGc$kyvMdMLXv?Bh#oY++t#`)Uv1Q;MNyDn5&`ko6<3u zJUTJE7%EDx$NEbT!)VoQ9%QB!Od4wAJTsrA<_F~0I^i`VcPXC25eQ6dS|-!35e!i< zV@I6#1JFF>1qdTDZ6G`)!D2z6f)gNBMn7VyRyrUJ4SA@k;g1Z^c^()~_a;N&InjiZ zc?h;Q^`At3Q^{wqeZ}#K|AQus1>8cP`B{s}a-W;9iv5Yz@x4!u>u_gamx_FHRV%tr zc2T+b{O3jIZ%9|e!12h6x)}<{#xsBv+8vokMDqpa8E8d;K)j?8h6cPL={Y^rA1_H= zl@gJ`HW4q7e7v;k)oqa_!fyUvj@r{mm@Df=Y z&H1L8!tU!Lvcyyc$f-TLQ)$&e<*aw;EZ{B*@d$nAm&Z5Ex1BcmHpvvOzbR39m{QsM zXwocZPNkfBj&FDjKOlP@psT%CgDb4_qS48cbs_pT#F$b$tl(dp=xo9BQvIzB&h-7g000bAhypF;?yN3`z9`WMT*}H`pA~Zmt?+kOkWzE&3(b7 zHdOqZMYX9NvW}R~u0iAW6!@FP`T0BYAd6uG<)eAz!IcHyNeN{t4KV>DRy9MLK;CQu zcCwaFS0=m(dYJq3qt%K<8?PWmYEj3TpQM5a+NY{Iu+FJhP*+UfkK1st|J>GejnRG0Ce~FM}`NGNby*ZID zql#8vl^TDcqtzItIlkBH7%i>7Q8l@Rf&N#;ea)(BEv`+4tjn)D@SyWv(A*7wO|mYk_KRZmhH&*S+&esol?jwJ<7)B+ABiIZxV(7t)m`)8_r zQFUg(w-w+MA396)#zA!cLopWdv`9iyTrZyO?cAAf?&MTx9WkE%6Nz8o{u%noMQK+E z%ub?^P`9N{Td(E3@Q#oSnj9Yjg|7iZsaXy?Rr{2uG4ND*R<0>CzMQx(1_YQn$F)XM zVcP3qJ8|!r*h1DiQ}*10%BxN}@aBWwGNn9DebxFb!PF<&eEZW- z4XwD-q@KrRz8b6IVUIfV;gRR})OdVU5xtI9}&$-~TNV&As)V-L=CxTt( zYRNt{vv69C)44Wkg!LiF*}mk0+$q6}7g+Qd&Vs8G?t9@G5`s@TIB;*}w`uiLt zoLj34OLL&1!0$j_tV9u{tooU`M6>$&X(CMY&xW}&GIqs@M8VY;ml@k(<7g*t93L*E zUMHa#A{PAmleHu8Cd8Un!VKGbfstID32g~ds5=K{T*tK>D|JYKigh@T$q>AYF>z-= z>*O3b=C2gm7x0%_t^9+@RbB4}tPJ|ku*a4kgLNDDf-6_T3(*?e9}SeC&Wu`WTsml%up98OkZ)Tg2Ft%L2W;$hM%4;@;ZOeG`s-x#g1jc=R7)TQHK zbsodoH!Lpqk^oT*MJH6|NV}Kb{KdtiG&Pm!vj-Vc)L+1vh_5n|tEyM+N+Q3HyVy6@ zp@PY4a2q#n;5U=n5NvsV7m2WTttG0c$N(Lk2WMl&S}}sXBe2Rf(c-c0G!1rRQ2{3D zk=t9f{NZ7ljbW>L{=kA1DzmJ%D7U{I0jRChZu|kSQNeDu959I;fmuF|aX7|oERepF zr^PYSyIJgGqc-xN#-Q3!9LSAKjWg**#KBD(#ClO||CcG@=#N&(Y~~l?=+7 z&55dvM;sn$?-)4n`p~{lKR3kLk4W2|aEhKQVh$obul*5?3ftHAhzW1ylv4YK86oI$ zuxmtQCt9vcbZ=D^>AsZ@HmPywVqXDd_cDVq^5{Z6D$VoFB}+GyY>WfPv_;G~)$rcq zp;2&u&-PGyh^kd?#t8CXc)2qdd+Vcu-{&fg#f)63=08kKd(pz1D-2F$ML!r>dhBne`(^mjf?NJ2oPEb%Rh8v!vCDglwAAtZ0|kdx7FVE|Lze ze2!)W3dZl-%D&i-F!uU&%$6HK?!C}nqq>k`NE^H!{MPJ8`D-OW44RnaGT!#)rsZga zfqYtKS4XEpQZ#p$rM5l7LfsajZKwmhy^1~_qs(kaIuV$&!Tu0xseD`JJsF{v+yjSzgK@000!lrF+hYF z_s3@`WrzUlq(87<054Ca+u0`K6`+|g5iv-Sk&uwULjE(u0i~jNXmLVkN{Wl+)sBD_ zLekZhpOlo8({@=5kbc9vQ?hBYc11WvJD@rjhIlgKrBy%^+bkSdiSRzhgm0OPRYein zwE59{T$}kN+ayVbb!-LgL~TbqL4K=~3~b|i@)9>;-O6p;Q=W6T#*E&Ndf$Cc$ZHb5 zX?vB|Me8hwc=h-lZ#~*pTQLmZt@M<|T<^JJ*`xALB!q0sO#Jsp5k=|f5crlnxuLF4 z;1T`^e8;Z0I|Wt1Y@i~{$HxaG^e{aP3?4q}C7&l@sN=BtcDIDzZWSJS1aL)Bv#|~4 zPrV^a$hyig`SeAiTvAO+Z;q!|k<@e`Xq-b%P5tIfe*1KQJFXrf6n=Y(1t|w9Pn;eX z$$ukG-g1%P%Q#xS`=xba=0vvVgpbPY(7O@Rd5m$sXZkwmgI}x|y|$RXSKM;Im(@vc zRr9rw^Si#l^4Z@D>YozIn$k(<2Nle2r)TULdeTt*!)9{00(+)CDj$0}HBj#q8r* z#?x|)EcvhbMhEDtfR7jp49{rRStbpZ#|f5ydqJmK?iSML9=Ii$w}0>Q2`8_wl3Cqq zmszd!(WXiJhMlz#{++CDP<4(glU|u(n6Rvb?Wt~p2_4(>8tv~smxwPty!rlBwCTvx zozqN)j~Ji!Bb+G$wlCVtX{h8{S0_pzzE!vyOvPY9Wo|yg6a>l{4WIUWU~bzzj`;ik zs)tMorLnTGNPhmz2z8e0ST`a7mky`Z*yor0&ijcocJvC#0l>qdc@kDwE!VJ&nx%de zteY?CSU)6i)rli*q|Yd;gd1lzOv-bPQbV&y{yvd+7w4yIT#D5zr&d&Fif1ylb!Pfm zcuQV>yRk^p#{W&`Ds1u24->eW#2jlj0!+c|5iqRc#sNE_c*z7c$1o>pKCm-i z1Fvh9*%*E4hbDZe=}E+spIx^B0_pAV_g5^ac1p3IGJ>)V}AknK$mwDev5_=Qc-NI8O7k? zQf7k?9Mav%!{F;KE@J<{cU|vn8lE05g zo?B- z2fWR-+d60@7iiwsW=i41z$!Qj<}jeb4b+woHM9b6Q}3q_P+OB;7_c?i~yi-CHku!Y%Cgk{e^?{26dsp}QBce9XZ|47e5uyt&x zau@yj{UKPA>+|=*(%WP9&x+dV)f4fwIF!uU5Zs`Ai6X4w9%YO5X?IG}KSk*#cRO#w zvjm#w{vXDU_|1`W=XxodIPIYC+9i$BGJNEogwoxR7!{t3cYhZer8uxh(K}3F4eQNsI__4ER93)oj;dY&SL?-bXPwnRHa-z6*`Z3Vu@Mf}v!0CT*z`m9za z{Dy>>pr=`_o9KxH2OvW(gQ>1O6&0F);{4vYi`L zrs_4IeW7;-p<7g10e}CCMzj|KPuq*gL%PRb^a^kE6`3b;T&_qeu5x$_kkb9SX5Oo* zpA5>5Y^79$O_19g!80r_FyD}r66u?R07MDal(P- zRP6QZXHbtXR89ro)xk57BA$3 zdOZU0JN{I8RahHYsJbZe52Xf&afoupJqv6cX@1Jp`fvd1_~JFfcmEzSo=f%KCh4S! zDkMGvbZc@aqPs|^?%I8Az5zdSjUn^Qrs&|-)&feFzJSf?#pDA(&i_{WJlWXH@b+bC zq?2&*C_#;NNO5hP#-vbTjQp3v#lbmI!O4*(BVCo!%af2oV*nH~7aOsYX*P3S&-Pz2 z#tgznTfK^uq=sqLc|dWO1>24OD+Pptg0K00SDwG{x$9O$Taq6pM7ij>>s6WT@!ZkL zw9knBVPp`PFfjz9y1;D!9l8Kh{r>-1AXKrAJU+I)2X10lU=W4+i-GdK3gp7y;M+0G zir=ZJ1@w@6K!-fvEWB2~@v zE>>38!tzE&)k_U5W;4hbho4YHzFMaX5puNs#QF8B)z*n8qhaUp@N0B5dxLxJjnb`p zx$eyJB`_M!mRf?zOMdVB2Dgp9M_|re1*14NIa}VRe;7{IV>gwoLYXICREPdtkO*7e z*HohZNfM!m^xEq+573+zgn`;8v0#LqEZio^$nJX<`}AX$Ldi>-=)5s$WXo7GBLTrH zeWit+1NF9NY=LaDyci!Nm_m*gCksX*SIfOh2irkn7ZuI@nTs{Zn@`J3%z8cj0Fd;{) z52NKS$h`H3g~fsBe1dPD?BOe~FQQ6fuY1V@35Bvh^B;ssr$lCR4u!GT$iB<1PoK{3 z|C#!&IWnDTMKPO)D=w#~%0zmIzD%g-@XEJ1hK63Ql4eTsiBGpu|EV)$*(3IsU5pv8 zmEe7jHAhfQt6`qh)m2WdVI&b1d(GX0Z?D|HOVTvFqu?R4y|~|_yR2~)De?p8^+O&$ z8h3N;pjvR%7@amcNBf=V^_6Vn!xUuGuNzPE(}tt$!5<#?KKzsOA7j37qi@9{*2I#Q zotxa+O%04`8-ejYsJq~!r;x9h!^z8EXjVo=ad#;^Y{=vl7%Hj-)K9mx$6~ZV}doD(Uy$8K7U)x>hkW@Up zUsh?NJD`=)*{M633fCa>3LF2!gJ^AMYd+kc+cUACP?OEbilkpI6i8Hb8hT!~?T_VU zZ(~IEx;t;Ah%Q`wX8B6A3gWA%$Ium=2G{6NF~$VZFuB!DcCzV+Yn*sEAxtMyUTC+U zd2*xEBCH)IQUeWYQtD|NkL-F0-5rG5#0=4fY0RgsPo-4%D2!Kh@^e4x<8e`j8_s!E z(aUONJt-~xT$FlMlsaPTAop!3Jlz;uviczQ_0<{8XISEhsaQ_zhgU0)hdh6Om6obL zS=v|EE1U7Vu6>{2=3}3Uaog!x9nDKzX?_-Sk5Dt*b)Q+0P^p!biIxQEyjOjQIUD&~ z3DLFvxq9?jo)k}=%zlOI_V+O9ALMiAJ`lg-DyEnl&X8(_C-Lav*nOO8Qc;*ZvI}!8 zo+>xG@Y1t%eR`DV6^jkyn!(kwzG`#4h^*uye1~}ogx5bUGKj987X)`*agm-PxT|xr z1_xFg?{bCRkhF7sJ3yRM!4Bl2Qr;B~>;~y}xYQaxHGQ|BaU?E}D?e zK>1ie=6}?Eb>)#w?bhwJ@#r3lks7i{Db9!9MH!7Qz_ZLQWYwNC**V3}wzv&=)d0PTJ8JGOivLa{J zm}t~=9zVTyUyq%+sfu8E*Vat*F`cZ8CE+V=xnL18`ztuF#pE<_^DI45D^4Pwjbti7 z1crUhoupnO9_KPQFMSur4N4x zOizmtEv6Vqcprw{Dp5)6kMY5NQP-?`;zcO z^=+J&5%REvo11u4g4iryE|k-9ccB|>L5n8U&tju-Ny^J<{mj#e_kq7I{S!*DO+N9dH&~E(Q3hT9 z)(W^~cG#KWB=Ly-LU-*(^dpsZicd-TJk_N#A=u8f$<|fp&syBhXy4fVFnjW(ZCulr zV}e*NZ>KIiR*Rl*TJvpMK8beks?w0Ew%H#G#zqbw+K3mnQE#@oPHt3xBx%Uoi__6h zJahVRWPY`<@~d^q@-a}CT$bXltIzUor{oq z&P^MEYSnqOalb>lwnZ}O%bW(|1McY+0Pz4MY zYe4oO>BI$9>RlzNkv=NjIw5A(Yg+gd?LMztWLa5BYl@^$Fp0LP{Q@A7uE#wS0@>M_>zIc8NOU z;PN@efr%I&YB8jHQ%J;e~ei>v)nE{ymty3WNX{LeK6;ftbP3db*t&d+Te z>N#JLwpFNjS+(go?PI^qDQaF2=~cgtKg54!Tng)rXw&uOD0U2V^(^XXiKakoPOpMj zX~Pb(80=SNUNQWT<@orh+WO!lKP&t17q{89r>C-LZ7xhx*) zvxD|0TiaG=LC$@zF}COWXl6&|OB_td!|mf$_TTCVmeEa* zH4*6f7Vh;V5yg5{3{mgtdGlBE2ciMI%SFkfNqc@G1OM;sZ`5~>3B%;VC-xnT(3EAD zO6&s_FZea)qm36G%FC!8S6nGfo=r}?M^VV(tyL?k;3mR0y&4^o;)R9j8Cbd>^jpTM zVC>d-6)UmJ-bQ!;A{P9{bF%8^n5GCn5X5_7z7Eyd=I9rgDEUg``cGPAZkwgdAtiXVbx+aY>!0mw8dmt z!nzu$G>-k*=Z5f}jCVwa<$)o*ie~)zch)kqWvs=sOgsGp%*EvF*per9g|j`LTuJ`g zsmdYYwJ_yig1hNs(?}$s?AdMQ9V4GCH9mRrEs0PdQ;&VHAXM>I(yPaG6#*pl>5pYP|{OtSAH zJ5%!$6&MKEI_z{!+->KvGEbZ)M~sHsl76}& z-}%$-VSeY5CGd01r@?BUGP0CK`ao61!5wf6oXE6RMO zBF=89fZ4$>pIGFB4BvvLsWa1}4sfOoW^eUT3-GqFyowd@+b)l8l+;c?iCLZN z-6??bkQ)xCGJ1Q14J>R0jfY|z^=ko>zeWZM#IthC)lN@h^PG+cIFnRcwwrd2zubQl zRvjZ_L@X4K_G)F%0}=xzAmoV+N9w%jwBqfyMw!hMW<`8I{3Q)+rF3PlB4!ajad#FDpv)WhFplg9dsHHOBcjr_xRFI<_z z5#IW31b@;fo&7=tZ0j+!xyHSO6ppaMXnDu4WGz!;uBpHOL+!k^Yc)(*0}09JByK$R zOS1c|zL}4AZ^yk78}-R4NtDXC(%}=j5svr~__tb4%u-Is)Q-I!Cx8%@V2Hor0H+#d zi7TJXg=5w+P`f-kG>Ae~i<>#CWHOa7;);=e1KsLPs~?F;eG-X@mY6a&T#XX)R#%g( zUVbw)z|wcVZI{)WIqs<=d}N}Oe`BCEn5E`~lcQ%8_s@R#?*N4aaUC5dk4ZN&+r@JX z!#pPx5OQSGy6_$%@xZ}|Ks9vV#`{3g*OkygU-vQU8Xw!Vo~1xipMiqv?|WK<-A|l!lCL>eF(lyRR=07|4XM zaoOlW1nd2Wx*YR>)6#}E1u~vD9R@xo)BLmdk*_=#>0r0!!Kp+=Kt5B1G;j-JMvQPM z_`}ubvV)ADc=A)j;#T}N==|KKdL>UjN}AxV`!7uKEvbK^n3R}D%zF{2;>Kb+YluO< zoGhj~?}(aeym(U$3TuY<%c%m%Sw^4+{_RKlC0p*K_Es-Z=7#Q1A3veETU~woG3D0f z{hPfpddv_jys8=bFn#IP&1#t(S!9l&hrZa_w62-@0FoE02QQ9Jixdcm$v-$wJh?6_ z%T#l`!t{ZX+Pv!)fsUUtXMObbnF&14L@v8+tE%=hnZ>=jNBHlrORIc|jbV`L;lx$O zyZ^FlfluL|f{8D;QfU4RRUzi4YGMw-#nYiIuyGE^f>FtEr4DFl6js|Gy_tDh8K|X0vjDw5e!MWwov2z|+T<*n8^{zHkl>V{|RA(~UTxN|Jd^ zh!V>&iWTgxl(h9dqh9~0EzQjl#*?w$WE$8ajaVXVYC3ycixyFhb${am~TwR)whvKw~i`u<05pM)GvPMXXbiw@ZYFdvok3yOzLRk+KZWc zDXtrb{;vCN>y}oFy}EsHZR)!BI7S*Pk&WAVlbChamDjV2h8I4d94st6RasytGuCb5 z${*0e3Tbw6=QO96p*3-*J&c6ql$|OU*NM$V{frM^&jjPlwWx-aH{Go-jb5D?{p2ej z07W-?6=q*vzq!brZhiClc+!~r;ky~)Z$EwCKPleJ>hHr)E=uq>TM@||ISPK(@+4kt zPg3U|Ypr2OcAi-Mb|gj;rC^%o@YLx0qEr(8{Z7fm1VMqsnc%PO6PdkVE~D8vZ0o!~ z>A%^Dqe-b;OnzmO+%#=@()%1dCZD@iM>k$avc7fI;i};-wr*lhzm100zHEdHu0$9@|EUv7*b=XGcG=a8 z9Swn~jGyH*@0Y6;h9AhGH*9|9wTWM9>nO_jbY9Y$Dd()4QU{UyeQGntdXdJFeAV*R_hmGI2#;G4}NPj z6?Tu&(>8kSeH(#--JoUMfnISuR2%xnDQHPx8a>Y};P)?@&d{?fxcptx^PD+~t<7dP zuy$1;9h^CGCPHca+n>v-N@;Mnls7^>R5ZLia3eAx$TUH&YRy#dn=hZ)VAMke#{JJDOr~YBQgi$<@DSAPwk(yDm6k{>}-Y+dBmchzM36V0S5FWYg zpYSYC??Kj$-k18^xgieLmCxuM85lPVRB@^BUXG!)NLBDY9{S_UBu6YTe(1jExnaO^ zz?)z$Co)49H=)B#S80V8NlQl3o2k*RreiPSb=S3^j9Rbd4z8yKt31==Ao^S7FM{BZ|52zI4l8;UyYa(st}JPOA5MZS|}dd^ihtaa1IjPRz<84ZK|6 zppL!2F0WGN~~2)rCK;!)EqU^?-E%ul zT?G6A0l8K6Gi{~hz!i*RpU@-`cgzTi8Xu1Jk2+N2P12vMFHz@uu1qYzp zXF`+kQvHmlKk~q&k3%BLfuJHMDyb`L`Qb-YlffcmHtci!_d9CkR^;GKaGkpzvtWdJ zKc8Ze8^5n;Uo7-69+7I?qT%d^ifVCa*Kkwe!K;OenT)*ftwVO%()nLMqEo z(fFL;4#%f-l%Xs5N%Q5(6&ErO47jH-K7Sk7xtHE2cy+6=WM5~Gc^R9p`PIt-J%gtFO3VcOZZ`zqgB~??KEcJ3ou&|qL-wWokabziG3ahi08i+}znUT5Pogjv@ ze%yy&qyK(qD!B!>oLJjEBUyU#EFnDo=N$n3G37g;P1wiy?2vPtZRkHf05`d-@oLGs#z%V|}dScLO$+MK)* z?X<{gvq-l5mqXamg)(@NiH_tj%N;Ikb2~q%NH#~sP@`NYaUfBIYNC_>sV%uE8oind zIYOwgX!5xa8~XAH>o3wQ`$2L}1B+^90UP%^tG93V`eM1F6sh9qK zCg!+L(A2!a%b@_?ybfc-gPxqQOiG*}zNMQq2Ddf`NZZP;A?rNfeG1adPp(q;E>_h3 z`m%yfxNA6<|MyLi#{~L8qwB23$)28Qrke{5vu(lGt*L0e)RlZQ@&{I!n(UHk`)ppV>pG~8)4{=tUkg<<>Hr0E;&n*Odq19T~$FdI=`0pn_$@EMi_w| zMd5j~1ZU21joR%JdsJ+xu<3OO03uYw-Zq9n<)%l5vcVbnkH zu2dkV_X#5CDZ%7WCW##TL2qtUG&E>HkVoC?g)h~8&+fjFsE3xRE0koxnRMC5W&g93 zW(=GW?%}NJ*3UL?t9Iz~-Bzx2aoaE4Q&Lp^I4w7 zj26f7k)VKt6*doYgnFq8>r~~476c_$)LZtgHI`N9c=7)=iO)`P5`L49;`TW86-3;i zX0k#j2U^-CYHe;_*y^3G-SAabg3sG*K3pDt2Fuu%{VXl7Z(>AUxF^{ZTUn8qvlCN6 zI#M?eO}l8!`Zz`yTq4f3#XBqz_P%4ph?CoqB7NQ97T#U=JoEElBIolwi-Slxy|H9U zE|RuqH)yX1S?pKgN+5QED8nL`LtteUJ*x>e4Km6&V0_d@aPRWoTtZA>A$x88{!ine zAUSYknWr7%rOtS7{%8s}A>L+2Lm92A>hB4>ify^itOAPV6xls}swL~MG|rEGb#5(2 zPK;^h84g$ca6S%9yDJg8t2#Fm@>fv8z z@96VNG>Y{&XsgWc{j4NORcDt1_X_(FldWZUngb|mBPsQh-eR)_J zokM$N#hQhWTzA$fa3^b2lDB5Y>ryZtjJ|axeO|}yXj_uwkPm0?o1R>oh5R?~-MK2^ z{UbI|wqG3#mpsX#N10#_0f~lQ7xfRiH~V$z-^9|9<8LXzxLG)b30(xX)Wwl+{OBaI#b>@mEOB}ggIo!T zw%OAq**f2%?^2bRl4;W_d08N(dVbWhuH^NX?}Ydl$JuJDJ1=X2w4#WxdXkLg=hEPOD9Ls2?gSY{GNOjp?BFviXB1FmdVF6ILDBz0BCsVlSbaPPe3L z9^)XKuCau_X6O^@o{XSRI*j?$37h|eW zvs&+nCr`sF%%2YPv*Smjs2`_Yy@~rS+qpqqH~V1cI~=y!ogTR#bySi5RSioI#^+g( z5@Q5q)-jY?)6!_9_2X&VaqK7Wxt6%| z#iQP!ueisW^F5}Et?nPKZK@AQ*L`n3?Ei3Xd6;9rGa{?KC;2AbkT0%hqD@e}fvci} zd_O)^5@y`sc=0R$)6zf@TY`&?f0K|^zVcEZvm}Txs9$wh7ku}&k*Klx?P#{MbcgZn z&46t@hMr)##KcFt^;rClgp&vsj9@P8G^};pEW@w?f3lLdMK3x&+4f;conzGHT_Vsf z&rK;QxYaep&JwRrdk&4KEB3uoe)l3^Va? z{9M{YRbL+y^m2If*2@hoTvF}d5eT{Xb!X9LIlc_Px~uj@%lwQ7+thHC9tYvI?xspq zWW9Wuq^KvEM^|k6-3zc7YyU&VGp!XJ%3M@I-mAqxvwcU3E zM(AVRUq)bqfdmuh|66bI7IToDtri1g9lqTWPclZP2(vI#VWTlh3ui2@I6cb6<~#DR z!QI&L9BoVw-BT5{;euMKMmVDn!l_+*g^*mAcK~aG)!6>L`QZ0FDgXF4H2aRe*XCnm zw@;hump5c7hTAlZ*oCnguNEoN0*bjXEW>8Uz~*PoOLoq z3o&A(D&gSa@uuiW9Ja>!kE_UHX&#m}m%Ch7xoY!q651!q^^CYslD*R~aqjr(GBibC zJL**|c*nVIALgpZ<3;rvl||laDA~Jjl-9~}N6Ft;C?kPksLz7Us3=N+r-o2d#}5^9 zs9}-Q(67Rr^sbemdW=m;A|N2AVz{Am#I^8I$_VmEQpg6EE1KvL^JIVSdAc$=HssJ| z@c0YM1b}pt-FG^@mMKv>mGZ+BTTPf&ZQxGx(}GxGR~yx3d}^b3O7g+S&*T^j3cC{^;8Mt1p%umCCbbXwmi#4Yh-(Ev~Q};U8pQVNTJRLS!{}OGokLYL(+k-mRUoL3(mTz}cj^K1m z>vu024_1b5$aAq4FsAMVbkVI+zgQwKH|6=d0oA_!uWm;L0k%(^<#sMPS40AqyG+cp zB@G>=yCW-haq8q>JtV>$uD#`({mPTG97ONeTQ zj=BgOs}m4D_IY|qC@^vt-=wQm`5jEm=g(TScc9RC>5|?3>coTZcE94jnTPkGgzMG>_ZX8kA zWi8^=w}g-5W365aXIFj+YzumqsaJ^&VO)3Gt=JzWJW)ZAKWbrcK`Em)A({_bFeY0x z$DEI3o+*86kRl$ffF`fcptdryd%{LI&oRBV)lElVhFOoxDdoAOg2-z(E+D&4q7&gQ zS80JM#5(Z)R*4!%UviEiIlLA4bJ|#GKTpb)K`j$3Q>QX63tP_R^}`>a$5xqFoQtX) zqU0JQZe$MSvn?&eV)s_YMK(_eW2E^xvT}rxb#%5*DZB>9$8HCF@arEKH@3744k`Q@ z20bG8t4QajtEZg0UUVBMO7AD@cTE9BjWvG3;a9>agt101bO0w5<7;?$;H(Uc3t_`|5Txbt@yQDs6bp zku6d3qCUMtN%M9IB+Gf(quOgprJuvPLp`gu7UTtll$ih@!xxrK_LH5|0 z9P*WAZUa~Rm?9Sq9<#-|*SI#=>c&&?4l##Er36LI&)d9fsRLWro!lefIenAwoH{hx zG1zLyBi2JCaK((uA2;|1@{aG+wxA3ZHc6qss922tC`K^8G&i1-L@ClyY@@DVwek6d z)jgbX(*mI#EGg*Ac>l#)wGsR6-BnYKL_z9D$lOoX1~;v!g#x2>h+SIXg#B>lZLmHt8K7=mr7wEvq83^hwg z<)bH;iSxWG&*FE4*v{Hz>b5Iisix2g+-*5(n}<~L8b6WpP2M)`JK)P#SJ9u)MFo*Z z_|kRXJupIhD@j6ziVRb^N}tRur#IJqvDgdklyOMl<}ln9Z{u}b+Ed3=M-Le*CUi74 z^mDQ8D;my8?`$SGrpN4!8D)AMiuKk2PiRb^Lgj;;^a8xXdz(X4rISKHGoh;Me?mGH z&rj3b-^#D^yOR|Y6?sgAY#N(eAbHlj_iSmhU}H&_{*;wPi;n%#{aYgGPZC&!xd>4H zQ#Ktu&abiOEp$D<&*;`AUV#Bl(RimtT_nRnYWkE#gW;Cp<^ZN|fBVpix6F&LgtT>~ zlqfax)~aT27E-2fGTES8>7IzTDf5cNRx3=R94(t7hJ>q9=y&c4MCRBLmPB?yH}=uy zN9M4E=+Fk)&e`c-kx4-X7*}!D-c3z$SlehMr>F<#Xxm!PJ?65p4Wh-YOY#=kp8m*e zF?=WG)UQS^3MltHH7^Fo;D|FtXE@~|P_3T?1+ zK7~?%5cko>cPQkL$|!JNTAngl%6(%l!{>^ywnJrYTOTw^q3}_&mMH_IC+i?cP_}q5DK@zb+v4*$ORQBr2%rg%W2RE#jXN#!0bd z#SY)zieC&Vkws6mp*M+pqs$U*qb$$eU65cvQ$1a}jd%OTNG9fi2fo^3->_{tc)}@F zNyMIuROv5o_m1o0xEndnoDnMogsRWyNeIWkjEu7mL`NSV%@qZciFkNC;rFvr&G_U_ z*S~Te)@~QL(ma7jzY@2y@Kg}`?}?x>dP)#?jrBVLdYHpcMFmDe*J}-Hak_9t$=KbgZky1_%tM%O$wWez;3% zf8wd$ftX|JNjH?=G1u*O3p^8{q)w#gO&R93@PT3-I1X6-?%H+x?3)MY4+{~WY}R1Y zIHnuNs!tw5(~3mqG5Y7$y}*rxA%9SMDg-68_NIUG?_q9DVWEntC|Vv@r2|myNzcTh zFe!!DH;xh5I_skf&mX3A)@`83{fs7pkH+J(7z?ecdj6=pr}_!4RpLEL$KPb$Pc~1O zcXr-Wh~kGwL};>D-|mpofDUDZtV8(?&?ST0$6o&Aja5Q9RA@#bTlU3*+AH@7j)}%h zVg3aZ8DZ?RQyIc>@ijs(Bcr+53@8HW>v9cuo4$!?$R_kgG5B)Noou7Zo$3tzM+IC4c*8QxJEsO0c7Pxe^C)&1LF0JlQY%j!)_^nw6N)rXI9;5g-8rOA}6TK4> z0f+f_v=5ycszQ#6pxWOD;p9>_6=^E%XpPy$$D}&iAaz|c}e(P>JKU9_Gz zdf6#bCp`a=XK2K?kzhy7aP=$q5p5w|?7o+VmMLvGLc|qqx`9dM`IHy+521*MAW8Mz zY5Td|rWM&hIcR+&6_24CO8l>)9f-;!!97U5?`=@#e-7p8PEde7y}BACyjgVvfn+6( zfp{5GB^KKIwm?7r1*rc5&re!aHEj7Yz=QtOxK}P0<;I5wq}`0rX2!+n%k9c+^E@H?6?6q^GYEuFTfD_01psY3uj zLK?+sXjL2xg62YbxI*mOI%aI3h0vK1c*$M~8G@$Pomw$yZzm`TQPQqop`!AixWsPw zIb{5-s|L;OauzUEE@;0(4C}UUkZyJHYd<^sMmBcu1zNfi6*fQrqE;d|9UTfV)-toR zeE_6_#Jnw;wC^f`xZKnt^G5LY?yfI@qCjdH6&3aOf%E?sn~nfrGy6Yy4{bPXICdYp zJ`r+lrO?&yuoiqLEpDN55sM^l55L!soQ{xBBNMLRY!2B7+Ii1d@fe2LReb>Jn?_XgvcdJwX(9;_=Z!xKyJ{=497vKER06He}Y4x560 zG@cVfYcG(L5g6YG!0Q81`R`WaLTeM%6iF=B zS(+v!gAWw?py6=P?l(xX(;!8op*zMG04a|`-@!5jc}N6;vuXQWDJ1m+ zdWxXo<%(HiXY{@Yl8g&q4&7F$_zYPZ@87&QeKGduqGN*pU)WNpRf|yrW-11h}(*QR73cdzI*ZHCV>Ca-%}QF5mNf)$nnd zH63CKwA9NsYfb^rgkflCi0pELhFDA-9LstYFN?ep$xT@k@q72j?;L(czK-Gh#g&M! zy#KjZ#i{!AYkE4L%Y^;vAg^w~Na${sfvR?p=l-cQ93p^70p7bD489KhF5CF!7&Ncz zE~=yEeS);o2k8UyVNN}=8iND1Xa*bhLwaC~Kvy4hGWb95YFDYI5&W%q!bAQol3~w} zou};62sJvPNgQcg+qo90B*+^ZcOvuZ#$jOZ!-+8S1EmflWr`A{8ue0hf7Y$!N4s(_ zBbfF%%TcuC5LJ87?R&x{OZ>$=eO7Pp0r! z#>mxGyWz&8dFIjJA;j@c8CIGR{~Ag%?(2rbx?!|8I)`#!>_sO358W&WK9oOOSFDHJ z`Cg}MmY1O_MEX$@>3|?%POJmNY++!t8J1)jzhN>p>05Tgg=a5ooc z*!>m`{cTo^)}pOj206Txua~S>OF|(s$g|F;EqR4}vYgDMg=#1U*teRv==l!%BwT(E z{02=)k;~?u4d2z}d>q;8LTQQg4$sVnf~& z+Ye6K0%FM_{g0*hFw6&{+Hrm%b${tcpCh%vimrD%KJ8fCedYbNF?~Ju^W!2^62u-w z3$rWIG8_OwS*W-|Vbf#x!K{RwdP;J_l0a(iCAwb~Hk3MQoKoiP%6}8BEqH43b&z!B!F;nJ1?ph<kkjn0|YCB4&I|K}gy z5Z_t(92fVy{RI2zmIT~j{~!oqBIhc693C+pamh9XyBUvf0o!sBJ4o=6?btPzXVCS@84jlcE7u#tMwP&5oiILL~yO> zXfFi3v9d)ZCG9Amo<=r~v+#*ZF%t$d?PwRFPjTR{!}imNigX z3SYK&gDEd7FRur{3xq6xs+?_oPk|za0e$#yfouZT{{{^2BB9lx0$E_#0~IbMHTCs@+{DD%C5Q5%1AN1>6z!`c00H$;yWcMquC6LU7I3as1AYT4Nk}{G6Qw3gPk<(2vMTfEV^kEW=+V@b`#UWR z#-j!<%S=du49MB07lMEWk{*J@TO*!=_d^nX_P;7dbD<|BBs{KkU`IR!>HrBFltqa1Fp&{)v_PLn-OIJluHZ<5l*m%i8)@Bj8wLB7NZvGM{qiW= za9hFB>Fda!rC*U2(nqBH)BKL8`(H(8letm!%*+@z<&+S<;tbw9aQdh3o%eBuD9AZ) zUEB2rUdoOkX^dm?-(^junh}5HIU^=ui%Oa-kt_0tWo)L!>R=v{8bXBiz}Ft&C0Fb4 zx3DP%x^p{!SdJ?GC56y^YN!3w9%GMGl>y#}X2uGN&bOy1$i*==_4IRON{oX)~83lc3C zN3(eMfpd>U-oZ9y8@iTfK_P@^fE+e>{}l#gAX?D&767KV7h+%21BeGWdCfGQXWrf2 zs;Bz*k*h}yqkDPR%MR`LmFMm+)9QXkHNCt}dW9q3vX2@$eOya>w*T0N%yS5N{@(Kc zWwQcN(*I*0baHvHasPhyj>qLud<1sbx0iJIN~n^U3gIGmu(afheNrAsG{*eU{hmZ? zSfXl%zOF(<%*OW=-6zSSjlS4N9{uMnnY)4bw09(aNMjOWN=p$YOwzP`zIV;&)~5zV zr`mkBq4~JoxW)U2aohrlg0U}IK(YXVcF@q zHslscTn{(cIHKa>a*vLjhAsF&w!swk>$tnF`zuT|G>Ph-j$>~EsyoWw%*uDX{_`z) zlp7UBVB+_s8D3qy`lCm@AR3SSNch26Ilo`R8<-75C6ke2)0O_L zPoPh**TN8*2~4h)p@Kw4)9sliA;*n4_h{HmWCGEr0;KV&PRs_fBarRqBZjBP$C?uI zaJf2H%^cJ@=-_v75&-~?UqFC4;EyYSM9{9c0Bp~J)KJlEa76MSV|YwcfO@z*A7%K1 zlos~2j*pGu?al^hR2FLEM zC$@IC8blppK!08IcfY@%Wquka@Gd^x%WF z+qK_-L36H`?~n@;s6#xtlhn#Ef{W&oSi5XDnAbnvcgaMzWs;>!?39ca>Aoalu%m&NJ8htS3tOJ)Y-~F|o zp1d&U{Z$zWF7gnP^a19z037r3rsriPARzMss9iF#ZXoA-B2AZ=7WV5AcGLCVQ89wH z=|G)jd%bI)mMI z9H3XQJAN?Q7~?dyu6wqg9FUV*kVZL5R;PTfbg4;>w!giyJEbM=8NVJ zJz#^2fu|ZwBW8g7uSj){00&SHYwPTEI0EczVVB{zr{0jz&iel17psVfKL5DgC#%|X z2deu{-`aNm+U@3l7E^ZH_UtcWYU=#{evNTRH^2O`4Auq^ZTZ>;&P}40b1mT`RnA3& z1%{lGISa<@?U#EWtdEusf#F>5o0***fEPyT%iZb1**l`zx$))>f{H<# z1Ckfs7a)^o6W1Em9PALsx51R^l#s~r{(+^!S|%4 zrDbJiwur977T|qIh!z+E&{N9U+p{~ZvR&*n1(Uzhmk}x@1|YSb=3t6;{X@usX{zg` ze4HW7u=)G4WRH;0Hj@S4q<$j#i(){Ol$iF(d?X?x%LFWk2@q~7PB$vo*C8>N1sT2^ z!go{IBlW#Gt=g_c@aae8VCLaeSrMjhpf^o{n3B>$j9x86nF^-+VJ~3y>>#OoS|rNF z%^eXLna-hKu>=SmaN-|-yq?Slr#*a11x!u=!}sUvaL0?f>jJTO5W?b!xU<^VW#G1Y z0p6Gi5X5XipSzr{m)b)Nk3%KY|I-n!kJz#p$VQYqT_1y(Lr^jC-xz7!t_ARrJ>~_~ z=>S1tqooxu9kU4n$RBbXU1O`N?wkmV*?Uk@g^6@|UawLR$R+xe919dLoKjl}?WBmSW2 z(2VXN<6OIv5bicnQ1OZ}@vt(6{J3V}W`4{zqNb(>E1Sx$BtVV*_{iW$wqC5eR`;So zL3C4locm+fspjE_xm}MT$>tUR|9JxgF`)&aKV#nxds1atwJe@zyUP6Q1vYhC9*%Plapsno^Ln`y_tC{MWNEhESPvtP*4Yw=n(P%54;aDISM7)pFhBRp;_y3-k+() zaC~&M1kq;I3>P4PR=SgYeSCcSzdpQC4fLF}5hr=kvqppgax;93mbOkYmaoIe$mk0J zgUlD*60>L|Jo@(m9Jwrj+}~XYnYj2ToKnf}J(tengUvCIhlj^P6R9DOu;T^~XU$Q{ z_TeEv1Txt(JZXZluYtw905LfqY?E(sbN4~4)r^LQ_NAcaq-~=4(+>HpTQl;{08AgU zn5-E$digRM4mpS5t?xTqHM}e=-TwNabD@%jle3G*w6_t?cYYOHjt)Lc;9q!a%?JuuGny82#MG z*S7=AV@Hkq>2F{rDS~e_(tWX zsO_j_si2~Qq9PCMLbCfG>#Ie(k(O8t_kN%eo46Q3j$*qIG&xI zeSu3U@HBCu~&T7{e5_VkyMOVnHC`n5CoR?%QR7SqRBB??`pNn6%lkCGBK66PFbw|d{_^E6hNilD3IqWG%aD|5 zYi)f~J_)fw>ESY#%&E|sj{JiptCu*xoKz&E0q*XKx@KKJ3~ zWC2(x^V5TM-9n>x7vRxMV8Pwwv-pC1?%cInZrHEret+)Qu&A=L;RboKKDYo6)JdRU zy_yYd-Rvi&`4eutO8@fMP+g$bm!UEQ8$zQGJqsPN5g$K7I6^(5L*WGp+|L zACJqP$+&d`PQ~TLDcis+&8(%z5P|VHZ9PQ>z5k=Q>kMlu-PSN3#49Stqllo8nGqSK z3Ib6|augAfCenmZRBotJ6hlNH(V0;WNO7o!7LgJN5)cfL8j%5{h8`&i14s=?kQS1V zD*@8_YWU^>+xcQd(V}fAs7>)Js5~1z1pm8I8CKXgh;R(AG*12m2cf z+92ZBpkC_>)Vruxi)Z=8-N+;mssLz8Ws;??U+^P<@~>`~pDwE91Numg-ta#Nfq8+5 zvd-htH)!xGQ0nyrhX9{5HMo851)$L)m?)j%3Fmbi5;7?%$pvWd?Dlw#_Rh|p<3(-p zini-{Ntxr%JC2tW7caHSLtJj(e%)ps-Vc>ZLtLgeShVpT^X-N28Stf@OYNMlUrz)h zaXQ)o)U93A!E!0=l(LEwqf+($jN5P{LfZ^}IEr!&}yi2?T9J z!=eAYIw+ktrl?yGK@aaQ1f;)zH=XVdkSopn7e8d(o#0^NHiuCG-w+a^Qb!5%dB^`ga#cuYPd?*qt3ZcaTrieW4d6uf?9Nln>~A zb;r=quop-T_LZLGRkWXq#pCzUYpwEXSvHQ2(BVV|wok(fCG~A!3xG*gEEb!dm$w>`+Y1P3YOgf*Ncc2t4oFvUV1vqZ z94-h@1iNe3{-pxMw`M76Kth^bJkYJ{3$b8wGN=r zpqEC2f+6w$_(ayfGeu3)*m%?^WUSYGcdbntkp@TO9)e0pDG;WR+sl1?(0V-x>_^=P zve;f}%mdk*J-Eys7;W+WepAgclxTpI_CVX_&%V&_1+m8-Xejn>Zdp>x!1E{)Dk4o< z;sC=znfBjVj(hS%X84VjLcG4UTcoD1G-&qn1}qgXZhnt* zdUGJ=*#nu$4p1k1FkC@&Qa;WQnFyxy{Us3gOdC^uh;M)R^EkJU2U;HH0x-$}iMn%@ zkHv72wc-X?Xpy-xEDf={CSV(G!ZL^z@5b+GYH4*Rfpxk^j~>17{yRnK>FGIdlH1oL z+MWb4wvP{bwcqosyB7rFL_$)Mk)rw9lZ#-HnUt~xBt@#62lFKv$_xajL8-?8%|K<3 zlSACgVHT(7+9%Q3JV{+m-3-Xe>>8ukcEDE?5)ur6>ZChbMfln#2kY2dmg^9=M zPo%%q2ju?Ejc@#4535FhnsMBp+Tbp)sMa`PdH)4lMSk<6<&lvl*R-EWjgB);yx_JD zHN7n*byU@-A`WU}U&V9Ez0L{)hw$idOh2OY;73)ldknC_A^vyx^1#eocId{Z4;mJN zHML{OWwT;q(J4SN+)`n&@c@uf;3;#?m=Xe zx0x>P%m%e))Rc_5yxhKsgMt4zQ9dy6(s0qrZl6V;7yWyJtixI=smSHIdT4HWoJ}H8>Is5B

zsneC0c#H=iIf{C5%p6UQ|H~w1TkQ`G#!xLA`66|t%rw**y)nk?`2LdgG0D8lM@!Mp z+(nx--7{-isehAsLTo?$cbO<O`Cu_g5SVazMvqn*yx5+_j zG3~rT9Lh149oH-n$ZyjflXC9TrgaNTU|tuPFdB}xaI@e4EU++U12+7rJ%k*>`w8f< z%ME4cwi-`eYJ)V|wzig3+4Acx<5O9Xa6Pw`hS2UVzK^V|IC$X4E0wcFqmq73gOJ48 zuA2|-w54=fYq#83NAzU8+dz!IRVnt}Zg8TB=Vh&ar>AT5A*wykGd&8MV^WF!eX4>j zw)ZB5YX17=-#eXuKcsG2n&KTVeRk-$krlH3VYAZAYQStTdoGhfWf*XXUrLqh{it%c z4}wg)#Lhu{1B*Xredi8p#K>f5=JGZ*CB~XHhAlEh1)u%7iKKdUWcZI$^`7GUIWUiU z56Nz4glK`2dwmO+jlOD$;iV0=OzGM*=xNyv4jwj*)o_PY?p&U18Lyf=Vs=*NJ;@ZE zI82}1uBfiQ2CWMve*7pAIlGQojl(H$Z3I>2f{%4d!HwImsh)p&6PyVO@9#T5Hwo(= z$RYc(`sneQ9=~53GUH;1a0#;EQyYsIlzxrPK)Y0o-5XW*f4Qi*9@9p&))jD_u#V1rWr#3cV8~vt~>tk{oq~GJGJ+CK%r{@ihj6HgSquh3VHW6>Ee*2TE z;<;o$dE7X+(g=`#XvcA|L-W1F#KdHS4jm!yYVE1HCv6X;XL{fc=MDPeUuo*98}a#a zB?2*<>uYi+S^kv+<2kMmVW_sip2UtcU7T+->=d(GxujRzArtA9RKXVB{??rA0%>}; zFBZL89LEf(XfYh@Q$HuYJi`N$oeLK{mp{p@-QE2mlJ@bN=G$*F8=w8syVV?#V-~Sr zbCsBG07bm|);rSGwe+Msvk#xF);?STUA-B3?Pw(0v z;W@kKQ-qfn3fjP*wD4|cQY|cN<$K?pW~~mVp6oKtZ&ZXzEtnRIk;6t}iYAI-BHUF| zk+<7$v-xhrG_B>q(#GU((>QQLJE2}`rkVH!OH{w!vAh!SX(g_yMuhB^C9 z)lUCVeF#JX`jcO-+P{M<&kj+=d%9b0Y*G|=uPed^CdJxCR8WV(Hssn@_@TK(3T(t#4IHN)dA6u?!{DYB%@or_H1=qH#`R)%@@fYu~u7)TP zbV9HN8!bEh8f&jozE<6^9ZhX2$uD@k{^{(GlDrz1j#q}lujCWP1K2F={qadNF0DID z<6vAdKA2u}u{R{j(oYoixhk*w+*dII{62mYabj7hb)o#?^kq?;ut)EQyLW%?zI9WIoUi?%D7nqu0_d>pTi%w(I{= zc(5C@_D18`vr(t9rzi6-I&WtWudn;Y!mOhSj`e;F&VpUmFkiXO8JtC`eXhSpRSrMl zm1R_$T;+7NckMKy93WK&@R`TVT?J8`S6UAA1u^1zPMvB=skb@tuC^bu zWTQgh0(FNU$#fbQ^^S@r^QY?V8Dyz%%I$lyAF++6xw^Q#Y2>buzR|>WBfF=pBYV6! zvnUyYV}9k(H=CQ4p2{s=^92{Y!*b_39lejcvumjTik@p=%*-ke11To>^It=Rp6b7-ddF;NjgVK0f3*i&%!B)-F(s#-vgYo?6 zvR-~b|L*Ie@n7`bhYo!vXL`+}+(*5Ub2+{jwBCjAE`dC{F^f%DG&?Z(d<1&*h+5%l z9(QYeg-E^7q+=Z7vx5>{x#-KR+EftG>IJx$O_7jnVwfb-)peC3Lp&xOnZ}(A&Rl(T z$pU*k<_bP5l7I(6)&}@?p4*K6qf7A0o^fZ*uw_({)F(oR(47uhIdE2U=C*tWV_>A& zxBsmtrGnd+xhtsCODlf%g1#(F=ZCQo4G|N4?I$2#W8DR!cb7$vTLIN9hhWSJ-i@}{ zr}*={^8`WU*7={J7h!s?4Dlk7$PAbL0Fj!+8Y+LL5w!5uakDQ)N4Oa$OkJ6=_I1Af z&+!0Moyf{rB&h2bysN&(gYUcmzK~dqAQK7j+ zKoTtDn+xaa39B1!1WYJ)WoNEU&?u5cxm8Y%@)oX#-J5hlZ?T8~DOgOQ?*O1`Cl|6a zG6DewS2f)|sN}QoD~r(CRH>L9dZShG*`Z7RA{GK0efcd4F__&gQ4!hxAG|a6T&*|Ryw4*@|ausL01M}=fi~AUn(~m-TV&f;! z(RTOyiZbw$-$YA2&D44i#fyBM#Kh`g2B`(A^i5EadX_ulQ9@#rBEn;?Iwe~w9rkgQ zgCAY_`1GV`ZMcdFjC%J#_@(Kzb1l>REEWqlP6#cVAm1}Bi0Kv|crMGEDpqG8+=KHn zzx7HEj$qwOLhM)Q!3@q}#=!eOTFkNt+!nL)gd^&M=lx1uKU8(cYBt@6RP}zSNHhBn zhTga%7GtD-W_)KhRusV31o+@|E705G+)}n+!+8(c6ctk61IkheTjdmbV9CEHRci~jCF5La~(n@!?r+sMqiXv#LnjOeE8w>i<1N_6BNa!~p)lW+Hk1@p-i;wz@! z0T@e3eYbMINt$nxqP?VbE}4&pM}L$a>sZSE{~T^eWT{iwa&OAnD5)JbRt{H4mv8_2 EKk9Go;Q#;t diff --git a/docs/visualisation.rst b/docs/visualisation.rst index d18e1648..efe2644a 100644 --- a/docs/visualisation.rst +++ b/docs/visualisation.rst @@ -12,7 +12,6 @@ visualisation tools to help user to judge the estimate. plot_mbar_overlap_matrix plot_ti_dhdl plot_dF_state - plot_convergence .. _plot_overlap_matrix: @@ -132,54 +131,6 @@ Will give a plot looks like this A bar plot of the free energy differences evaluated between pairs of adjacent states via several methods, with corresponding error estimates for each method. -.. _plot_convergence: - -Forward and Backward Convergence --------------------------------- -One way of determining the simulation end point is to plot the forward and -backward convergence of the estimate using -:func:`~alchemlyb.visualisation.plot_convergence`. - -Note that this is just a plotting function to plot [Klimovich2015]_ style -convergence plot. The user need to provide the forward and backward data list -and the corresponding error. :: - - >>> import pandas as pd - >>> from alchemtest.gmx import load_benzene - >>> from alchemlyb.parsing.gmx import extract_u_nk - >>> from alchemlyb.estimators import MBAR - - >>> bz = load_benzene().data - >>> data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] - >>> forward = [] - >>> forward_error = [] - >>> backward = [] - >>> backward_error = [] - >>> num_points = 10 - >>> for i in range(1, num_points+1): - >>> # Do the forward - >>> slice = int(len(data)/num_points*i) - >>> u_nk_coul = pd.concat([data[:slice] for data in data_list]) - >>> estimate = MBAR().fit(u_nk_coul) - >>> forward.append(estimate.delta_f_.iloc[0,-1]) - >>> forward_error.append(estimate.d_delta_f_.iloc[0,-1]) - >>> # Do the backward - >>> u_nk_coul = pd.concat([data[-slice:] for data in data_list]) - >>> estimate = MBAR().fit(u_nk_coul) - >>> backward.append(estimate.delta_f_.iloc[0,-1]) - >>> backward_error.append(estimate.d_delta_f_.iloc[0,-1]) - - >>> from alchemlyb.visualisation import plot_convergence - >>> ax = plot_convergence(forward, forward_error, backward, backward_error) - >>> ax.figure.savefig('dF_t.pdf') - -Will give a plot looks like this - -.. figure:: images/dF_t.png - - A convergence plot of showing that the forward and backward has converged - fully. - .. [Klimovich2015] Klimovich, P.V., Shirts, M.R. & Mobley, D.L. Guidelines for the analysis of free energy calculations. J Comput Aided Mol Des 29, 397–411 (2015). https://doi.org/10.1007/s10822-015-9840-9 diff --git a/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst b/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst deleted file mode 100644 index fa72d19c..00000000 --- a/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst +++ /dev/null @@ -1,19 +0,0 @@ -.. _visualisation_plot_convergence: - -Plot the Forward and Backward Convergence -========================================= - -The function :func:`~alchemlyb.visualisation.plot_convergence` allows -the user to visualise the convergence by plotting the free energy change -computed using the equilibrated snapshots between the proper target time frames -in both forward (data points are stored in `forward` and `forward_error`) and -reverse (data points are stored in `backward` and `backward_error`) directions. -The unit in the y axis could be labelled to other units by setting *units*, -which by default is kBT. The user can pass :class:`matplotlib.axes.Axes` into -the function to have the convergence drawn on a specific axes. - -Please check :ref:`How to plot convergence ` for usage. - -API Reference -------------- -.. autofunction:: alchemlyb.visualisation.plot_convergence \ No newline at end of file diff --git a/docs/visualisation/alchemlyb.visualisation.plot_dF_state.rst b/docs/visualisation/alchemlyb.visualisation.plot_dF_state.rst index 14bc2049..e4814efc 100644 --- a/docs/visualisation/alchemlyb.visualisation.plot_dF_state.rst +++ b/docs/visualisation/alchemlyb.visualisation.plot_dF_state.rst @@ -25,7 +25,7 @@ The figure could be plotted in *portrait* or *landscape* mode by setting the The user could pass a list of strings to `labels` to name the :class:`~alchemlyb.estimators` or a list of strings to `colors` to color the estimators differently. The unit in the y axis could be labelled to other -units by setting `units`, which by default is kBT. +units by setting `units`, which by default is :math:`kBT`. Please check :ref:`How to plot dF states ` for a complete example. diff --git a/docs/visualisation/alchemlyb.visualisation.plot_ti_dhdl.rst b/docs/visualisation/alchemlyb.visualisation.plot_ti_dhdl.rst index 3b1a3faa..af984f34 100644 --- a/docs/visualisation/alchemlyb.visualisation.plot_ti_dhdl.rst +++ b/docs/visualisation/alchemlyb.visualisation.plot_ti_dhdl.rst @@ -11,7 +11,7 @@ When custom labels are desirable, the user could pass a list of strings to the *labels* for labelling each alchemical transformation differently. The color of each alchemical transformation could also be set by passing a list of color string to the *colors*. The unit in the y axis could be labelled to other units -by setting *units*, which by default is kBT. The user can pass +by setting *units*, which by default is :math:`kBT`. The user can pass :class:`matplotlib.axes.Axes` into the function to have the dhdl drawn on a specific axes. diff --git a/src/alchemlyb/constants.py b/src/alchemlyb/constants.py index 830b49f7..459e5ea8 100644 --- a/src/alchemlyb/constants.py +++ b/src/alchemlyb/constants.py @@ -1,5 +1,7 @@ '''Physical and mathematical constants and units.''' # Taken from scipy.constants since py2 doesn't support it -k = 1.380649e-23 -N_A = 6.02214076e+23 +#: Boltzmann's constant :math:`k_B` in kJ/(mol K); value from `NIST CODATA: k`_. +Boltzmann_constant = 1.380649e-23 +#: Avogadro constant :math:`N_A` in 1/mol; value from `NIST CODATA: k`_. +Avogadro_constant = 6.02214076e+23 kJ2kcal = 0.239006 \ No newline at end of file diff --git a/src/alchemlyb/estimators/ti_.py b/src/alchemlyb/estimators/ti_.py index 4335fe7b..383341c9 100644 --- a/src/alchemlyb/estimators/ti_.py +++ b/src/alchemlyb/estimators/ti_.py @@ -105,46 +105,3 @@ def fit(self, dHdl): self.states_ = means.index.values.tolist() return self - - def separate_dhdl(self): - """ - For transitions with multiple lambda, this function will separate the - dhdl with multiple columns into a list of Dataframe with a single column - (single lambda). - - Returns - ---------- - dHdl_list : list - A list of Series such that dHdl_list[k][n] is the potential - energy gradient with respect to lambda for each configuration n and - lambda k. - - """ - if len(self.dhdl.index.names) == 1: - # If only one column is present convert to series - assert len(self.dhdl.columns) == 1 - name = self.dhdl.columns[0] - return [self.dhdl[name], ] - else: - dhdl_list = [] - # get the lambda names - l_types = self.dhdl.index.names - # obtain bool of changed lambdas between each state - lambdas = self.dhdl.reset_index()[l_types] - diff = lambdas.diff().to_numpy(dtype='bool') - # diff will give the first row as NaN so need to fix that - diff[0, :] = diff[1, :] - # Make sure that the start point is set to true as well - diff[:-1, :] = diff[:-1, :] | diff[1:, :] - for i in range(len(l_types)): - if any(diff[:,i]) == False: - # Skip if not pertubed - pass - else: - new = self.dhdl.iloc[diff[:,i], i] - # drop all other index - for l in l_types: - if l != l_types[i]: - new = new.reset_index(l, drop=True) - dhdl_list.append(new) - return dhdl_list diff --git a/src/alchemlyb/preprocessing/subsampling.py b/src/alchemlyb/preprocessing/subsampling.py index e7fe88b8..199ef880 100644 --- a/src/alchemlyb/preprocessing/subsampling.py +++ b/src/alchemlyb/preprocessing/subsampling.py @@ -14,6 +14,7 @@ def _check_multiple_times(df): else: return df.sort_index(0).reset_index('time').duplicated('time').any() + def _check_sorted(df): return df.reset_index(0)['time'].is_monotonic_increasing diff --git a/src/alchemlyb/tests/test_ti_estimators.py b/src/alchemlyb/tests/test_ti_estimators.py index ae25aaf5..208e27a9 100644 --- a/src/alchemlyb/tests/test_ti_estimators.py +++ b/src/alchemlyb/tests/test_ti_estimators.py @@ -136,8 +136,3 @@ class TestTI(TIestimatorMixin): def X_delta_f(self, request): get_dHdl, E, dE = request.param return get_dHdl(), E, dE - -def test_TI_separate_dhdl(): - dHdl = gomc_benzene_dHdl() - estimator = TI().fit(dHdl) - assert all([isinstance(dhdl, pd.Series) for dhdl in estimator.separate_dhdl()]) \ No newline at end of file diff --git a/src/alchemlyb/visualisation/__init__.py b/src/alchemlyb/visualisation/__init__.py index d58b367e..b7cf63cc 100644 --- a/src/alchemlyb/visualisation/__init__.py +++ b/src/alchemlyb/visualisation/__init__.py @@ -1,4 +1,3 @@ from .mbar_matrix import plot_mbar_overlap_matrix from .ti_dhdl import plot_ti_dhdl -from .dF_state import plot_dF_state -from .convergence import plot_convergence \ No newline at end of file +from .dF_state import plot_dF_state \ No newline at end of file diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py deleted file mode 100644 index 57e74868..00000000 --- a/src/alchemlyb/visualisation/convergence.py +++ /dev/null @@ -1,72 +0,0 @@ -import matplotlib.pyplot as plt -from matplotlib.font_manager import FontProperties as FP -import numpy as np - -def plot_convergence(forward, forward_error, backward, backward_error, - units='kBT', ax=None): - """Plot the forward and backward convergence. - - Parameters - ---------- - forward : List - A list of free energy estimate from the first X% of data. - forward_error : List - A list of error from the first X% of data. - backward : List - A list of free energy estimate from the last X% of data. - backward_error : List - A list of error from the last X% of data. - units : str - The label for the unit of the estimate. Default: 'kBT' - ax : matplotlib.axes.Axes - Matplotlib axes object where the plot will be drawn on. If ax=None, - a new axes will be generated. - - Returns - ------- - matplotlib.axes.Axes - An axes with the forward and backward convergence drawn. - - Note - ---- - The code is taken and modified from - : `Alchemical Analysis `_ - """ - if ax is None: # pragma: no cover - fig, ax = plt.subplots(figsize=(8, 6)) - - plt.setp(ax.spines['bottom'], color='#D2B9D3', lw=3, zorder=-2) - plt.setp(ax.spines['left'], color='#D2B9D3', lw=3, zorder=-2) - for dire in ['top', 'right']: - ax.spines[dire].set_color('none') - ax.xaxis.set_ticks_position('bottom') - ax.yaxis.set_ticks_position('left') - - f_ts = np.linspace(0, 1, len(forward) + 1)[1:] - r_ts = np.linspace(0, 1, len(backward) + 1)[1:] - - line0 = ax.fill_between([0, 1], backward[-1] - backward_error[-1], - backward[-1] + backward_error[-1], color='#D2B9D3', - zorder=1) - line1 = ax.errorbar(f_ts, forward, yerr=forward_error, color='#736AFF', - lw=3, zorder=2, marker='o', - mfc='w', mew=2.5, mec='#736AFF', ms=12,) - line2 = ax.errorbar(r_ts, backward, yerr=backward_error, color='#C11B17', - lw=3, zorder=3, marker='o', - mfc='w', mew=2.5, mec='#C11B17', ms=12, ) - - plt.xticks(r_ts[::2], fontsize=10) - plt.yticks(fontsize=10) - - leg = plt.legend((line1[0], line2[0]), (r'$Forward$', r'$Reverse$'), loc=9, - prop=FP(size=18), frameon=False) - plt.xlabel(r'$\mathrm{Fraction\/of\/the\/simulation\/time}$', fontsize=16, - color='#151B54') - plt.ylabel(r'$\mathrm{\Delta G\/%s}$' % units, fontsize=16, - color='#151B54') - plt.xticks(f_ts, ['%.2f' % i for i in f_ts]) - plt.tick_params(axis='x', color='#D2B9D3') - plt.tick_params(axis='y', color='#D2B9D3') - return ax - - diff --git a/src/alchemlyb/visualisation/dF_state.py b/src/alchemlyb/visualisation/dF_state.py index 27662579..710e6f4e 100644 --- a/src/alchemlyb/visualisation/dF_state.py +++ b/src/alchemlyb/visualisation/dF_state.py @@ -30,9 +30,10 @@ def plot_dF_state(estimators, labels=None, colors=None, units='kBT', colors : List list of colors for plotting different estimators. units : str - The unit of the estimate. Default: 'kcal/mol' + The unit of the estimate. Default: :math:`kBT` scaling_factor : float - The scaling factor to change the energy from kBT to the desired unit. + The scaling factor to change the energy from :math:`kBT` to the + desired unit. orientation : string The orientation of the figure. Can be `portrait` or `landscape` nb : int diff --git a/src/alchemlyb/visualisation/ti_dhdl.py b/src/alchemlyb/visualisation/ti_dhdl.py index d56b9b71..ea497909 100644 --- a/src/alchemlyb/visualisation/ti_dhdl.py +++ b/src/alchemlyb/visualisation/ti_dhdl.py @@ -29,9 +29,10 @@ def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kBT', list of colors for plotting all the alchemical transformations. Default: ['r', 'g', '#7F38EC', '#9F000F', 'b', 'y'] units : str - The label for the unit of the estimate. Default: 'kBT' + The label for the unit of the estimate. Default: :math:`kBT` scaling_factor : float - The scaling factor to change the energy from kBT to the desired unit. + The scaling factor to change the energy from :math:`kBT` to the + desired unit. ax : matplotlib.axes.Axes Matplotlib axes object where the plot will be drawn on. If ax=None, a new axes will be generated. diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 427a5e30..4f0f03fe 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -9,7 +9,7 @@ from ..estimators import MBAR, BAR, TI from ..visualisation import (plot_mbar_overlap_matrix, plot_ti_dhdl, plot_dF_state, plot_convergence) -from ..constants import k, N_A, kJ2kcal +from ..constants import Boltzmann_constant, Avogadro_constant, kJ2kcal class ABFE(): @@ -197,9 +197,11 @@ def update_units(self, units): if units == 'kBT': self.scaling_factor = 1 elif units == 'kJ/mol': - self.scaling_factor = k * self.T * N_A / 1000 + self.scaling_factor = Boltzmann_constant * self.T * \ + Avogadro_constant / 1000 elif units == 'kcal/mol': - self.scaling_factor = k * self.T * N_A / 1000 * kJ2kcal + self.scaling_factor = Boltzmann_constant * self.T * \ + Avogadro_constant / 1000 * kJ2kcal else: raise NameError('{} is not a valid unit.'.format(units)) self.units = units From eff79810e8858c748bfa5b5b5ef1ecdbc230cbdf Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 29 Mar 2021 22:18:42 +0100 Subject: [PATCH 019/123] clean up --- docs/visualisation.rst | 1 + src/alchemlyb/__init__.py | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/visualisation.rst b/docs/visualisation.rst index efe2644a..549631c0 100644 --- a/docs/visualisation.rst +++ b/docs/visualisation.rst @@ -131,6 +131,7 @@ Will give a plot looks like this A bar plot of the free energy differences evaluated between pairs of adjacent states via several methods, with corresponding error estimates for each method. + .. [Klimovich2015] Klimovich, P.V., Shirts, M.R. & Mobley, D.L. Guidelines for the analysis of free energy calculations. J Comput Aided Mol Des 29, 397–411 (2015). https://doi.org/10.1007/s10822-015-9840-9 diff --git a/src/alchemlyb/__init__.py b/src/alchemlyb/__init__.py index e44e9d15..74f4e668 100644 --- a/src/alchemlyb/__init__.py +++ b/src/alchemlyb/__init__.py @@ -1,3 +1,4 @@ + from ._version import get_versions __version__ = get_versions()['version'] del get_versions From ea6b8341f16cdce1de0cbd69adedd400624a7680 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Thu, 15 Apr 2021 09:37:21 +0100 Subject: [PATCH 020/123] bump coverage --- src/alchemlyb/tests/test_workflow.py | 17 ++++++++- src/alchemlyb/visualisation/dF_state.py | 8 ++--- src/alchemlyb/visualisation/ti_dhdl.py | 8 ++--- src/alchemlyb/workflows/abfe.py | 48 ++++++++++++------------- 4 files changed, 46 insertions(+), 35 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow.py b/src/alchemlyb/tests/test_workflow.py index 3ca4c687..e0366839 100644 --- a/src/alchemlyb/tests/test_workflow.py +++ b/src/alchemlyb/tests/test_workflow.py @@ -141,6 +141,21 @@ def test_convergence(self, workflow): os.remove('dF_t.pdf') assert len(workflow.convergence) == 10 + def test_convergence_TI(self, workflow): + '''test if the dF_state.pdf has been plotted.''' + workflow.check_convergence(10, estimator='ti', dF_t='dF_t.pdf') + assert os.path.isfile('dF_t.pdf') + os.remove('dF_t.pdf') + assert len(workflow.convergence) == 10 + + def test_dhdl_TI_noTI(self, workflow): + '''Test to plot the dhdl_TI when ti estimator is not there''' + full_estimator = workflow.estimator + workflow.estimator.pop('ti') + workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') + assert os.path.isfile('dhdl_TI.pdf') == False + workflow.estimator = full_estimator + class Test_automatic_benzene(): '''Test the full automatic workflow for load_benzene from alchemtest.gmx for single stage transformation.''' @@ -277,7 +292,7 @@ def workflow(): return workflow def test_change_unit(self, workflow): - workflow.update_units('kBT') + workflow.update_units('kT') assert workflow.scaling_factor == 1 workflow.update_units('kcal/mol') assert np.isclose(workflow.scaling_factor, 0.6, atol=0.1) diff --git a/src/alchemlyb/visualisation/dF_state.py b/src/alchemlyb/visualisation/dF_state.py index 710e6f4e..89f96357 100644 --- a/src/alchemlyb/visualisation/dF_state.py +++ b/src/alchemlyb/visualisation/dF_state.py @@ -14,7 +14,7 @@ from ..estimators import TI, BAR, MBAR -def plot_dF_state(estimators, labels=None, colors=None, units='kBT', +def plot_dF_state(estimators, labels=None, colors=None, units='kT', scaling_factor=1, orientation='portrait', nb=10): '''Plot the dhdl of TI. @@ -30,9 +30,9 @@ def plot_dF_state(estimators, labels=None, colors=None, units='kBT', colors : List list of colors for plotting different estimators. units : str - The unit of the estimate. Default: :math:`kBT` + The unit of the estimate. Default: `kT` scaling_factor : float - The scaling factor to change the energy from :math:`kBT` to the + The scaling factor to change the energy from :math:`kT` to the desired unit. orientation : string The orientation of the figure. Can be `portrait` or `landscape` @@ -50,7 +50,7 @@ def plot_dF_state(estimators, labels=None, colors=None, units='kBT', : `Alchemical Analysis `_ The units variable is for labelling only. Changing it doesn't change the - unit of the underlying variable, which is in the unit of kBT. The + unit of the underlying variable, which is in the unit of kT. The scaling_factor is used to change the number to the desired unit. ''' diff --git a/src/alchemlyb/visualisation/ti_dhdl.py b/src/alchemlyb/visualisation/ti_dhdl.py index ea497909..b948d0af 100644 --- a/src/alchemlyb/visualisation/ti_dhdl.py +++ b/src/alchemlyb/visualisation/ti_dhdl.py @@ -14,7 +14,7 @@ from matplotlib.font_manager import FontProperties as FP import numpy as np -def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kBT', +def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kT', scaling_factor=1, ax=None): '''Plot the dhdl of TI. @@ -29,9 +29,9 @@ def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kBT', list of colors for plotting all the alchemical transformations. Default: ['r', 'g', '#7F38EC', '#9F000F', 'b', 'y'] units : str - The label for the unit of the estimate. Default: :math:`kBT` + The label for the unit of the estimate. Default: `kT` scaling_factor : float - The scaling factor to change the energy from :math:`kBT` to the + The scaling factor to change the energy from :math:`kT` to the desired unit. ax : matplotlib.axes.Axes Matplotlib axes object where the plot will be drawn on. If ax=None, @@ -48,7 +48,7 @@ def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kBT', : `Alchemical Analysis `_ The units variable is for labelling only. Changing it doesn't change the - unit of the underlying variable, which is in the unit of kBT. The + unit of the underlying variable, which is in the unit of kT. The scaling_factor is used to change the number to the desired unit. ''' diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 4f0f03fe..c4fccab4 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -19,10 +19,9 @@ class ABFE(): ---------- units : str The unit used for printing and plotting results. {'kcal/mol', 'kJ/mol', - 'kBT'} + 'kT'} software : str - The software used for generating input. {'Gromacs', 'amber', 'namd', - 'gomc'} + The software used for generating input. {'Gromacs', } dir : str Directory in which data files are stored. Default: './'. prefix : str @@ -35,11 +34,9 @@ class ABFE(): Discard data prior to this specified time as 'equilibration' data. Units picoseconds. Default: 0. uncorr : str - The observable to be used for the autocorrelation analysis; either - 'dhdl_all' (obtained as a sum over all energy components) or 'dhdl' - (obtained as a sum over those energy components that are changing) or - 'dE'. In the latter case the energy differences dE_{i,i+1} (dE_{i,i-1} - for the last lambda) are used. Default: None (skipping this step). + The observable to be used for the autocorrelation analysis; 'dhdl' + (obtained as a sum over those energy components that are changing). + Default: `dhdl` threshold : int Proceed with correlated samples if the number of uncorrelated samples is found to be less than this number. If 0 is given, the time series @@ -131,7 +128,7 @@ def __init__(self, units='kcal/mol', software='Gromacs', dir='./', self.logger.warning( 'Error reading read dhdl from {}.'.format(xvg)) - # # Sort the files according to the state + # Sort the files according to the state if len(u_nk_list) > 0: self.logger.info('Sort files according to the u_nk.') column_names = u_nk_list[0].columns.values.tolist() @@ -180,21 +177,21 @@ def update_units(self, units): ---------- units : str The unit used for printing and plotting results. {'kcal/mol', - 'kJ/mol', 'kBT'} + 'kJ/mol', 'kT'} Attributes ---------- scaling_factor : float - The scaling factor to change the unit from kBT to the selected unit. + The scaling factor to change the unit from kT to the selected unit. Note ---- - The internal representations are all in kBT. This function only changes + The internal representations are all in kT. This function only changes the unit when outputting text file or plotting the results. ''' if units is not None: self.logger.info('Set unit to {}.'.format(units)) - if units == 'kBT': + if units == 'kT': self.scaling_factor = 1 elif units == 'kJ/mol': self.scaling_factor = Boltzmann_constant * self.T * \ @@ -216,11 +213,9 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): Discard data prior to this specified time as 'equilibration' data. Units picoseconds. Default: 0. uncorr : str - The observable to be used for the autocorrelation analysis; either - 'dhdl_all' (obtained as a sum over all energy components) or 'dhdl' - (obtained as a sum over those energy components that are changing) - or 'dE'. In the latter case the energy differences dE_{i,i+1} - (dE_{i,i-1} for the last lambda) are used. Default: `dhdl` + The observable to be used for the autocorrelation analysis; 'dhdl' + (obtained as a sum over those energy components that are changing). + Default: `dhdl` threshold : int Proceed with correlated samples if the number of uncorrelated samples is found to be less than this number. If 0 is given, the @@ -256,7 +251,8 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): else: # Single key col = u_nk[key[0]] - subsample = statistical_inefficiency(u_nk, col) + subsample = statistical_inefficiency(u_nk, col, sort=True, + drop_duplicates=True) else: # pragma: no cover # The dhdl_all and dE will be implemented here when #48 is # merged @@ -569,7 +565,7 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', convergence : DataFrame The DataFrame with convergence data. :: - Forward (kBT) F. Error (kBT) Backward (kBT) B. Error (kBT) + Forward (kT) F. Error (kT) Backward (kT) B. Error (kT) 0 33.988935 0.334676 35.666128 0.324426 1 35.075489 0.232150 35.382850 0.230944 2 34.919988 0.190424 35.156028 0.189489 @@ -646,7 +642,7 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', forward_error_list.append(error) else: forward_error_list.append(result.d_delta_f_.iloc[0, -1]) - self.logger.info('{:.2f} +/- {:.2f} kBT'.format(forward_list[-1], + self.logger.info('{:.2f} +/- {:.2f} kT'.format(forward_list[-1], forward_error_list[-1])) self.logger.info('Begin backward analysis') @@ -671,13 +667,13 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', backward_error_list.append(error) else: backward_error_list.append(result.d_delta_f_.iloc[0, -1]) - self.logger.info('{:.2f} +/- {:.2f} kBT'.format(backward_list[-1], + self.logger.info('{:.2f} +/- {:.2f} kT'.format(backward_list[-1], backward_error_list[-1])) - convergence = pd.DataFrame({'Forward (kBT)': forward_list, - 'F. Error (kBT)': forward_error_list, - 'Backward (kBT)': backward_list, - 'B. Error (kBT)': backward_error_list}) + convergence = pd.DataFrame({'Forward (kT)': forward_list, + 'F. Error (kT)': forward_error_list, + 'Backward (kT)': backward_list, + 'B. Error (kT)': backward_error_list}) self.convergence = convergence self.logger.info('Plot convergence analysis to {} under {}.' From f4b736c967fcd4599ea7ba7c3319a565b38d5d8c Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Thu, 15 Apr 2021 10:03:34 +0100 Subject: [PATCH 021/123] dump coverage --- src/alchemlyb/workflows/abfe.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index c4fccab4..a6703733 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -202,6 +202,8 @@ def update_units(self, units): else: raise NameError('{} is not a valid unit.'.format(units)) self.units = units + else: # pragma: no cover + pass def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): '''Preprocess the data by removing the equilibration time and @@ -269,7 +271,10 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): self.logger.info('Take {} uncorrelated u_nk for state ' '{}.'.format(len(subsample), index)) self.u_nk_sample_list.append(subsample) + else: # pragma: no cover + self.logger.info('No u_nk data being subsampled') + if len(self.dHdl_list) > 0: self.dHdl_sample_list = [] for index, dHdl in enumerate(self.dHdl_list): dHdl = dHdl[dHdl.index.get_level_values('time') >= skiptime] @@ -284,6 +289,8 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): self.logger.info('Take {} uncorrelated dHdl for state ' '{}.'.format(len(subsample), index)) self.dHdl_sample_list.append(subsample) + else: # pragma: no cover + self.logger.info('No dHdl data being subsampled') def estimate(self, methods=('mbar', 'bar', 'ti')): '''Estimate the free energy using the selected estimator. @@ -632,6 +639,9 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', elif estimator.lower() == 'ti': for data in dHdl_list: sample.append(data[:len(data) // forwrev * i]) + else: # pragma: no cover + raise NameError( + '{} is not a valid estimator.'.format(estimator)) sample = pd.concat(sample) result = estimator_fit(sample) forward_list.append(result.delta_f_.iloc[0, -1]) @@ -657,6 +667,9 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', elif estimator.lower() == 'ti': for data in dHdl_list: sample.append(data[-len(data) // forwrev * i:]) + else: # pragma: no cover + raise NameError( + '{} is not a valid estimator.'.format(estimator)) sample = pd.concat(sample) result = estimator_fit(sample) backward_list.append(result.delta_f_.iloc[0, -1]) From 7dcce776d5665c260bc9ee0ec8f93f5ccffcea10 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Thu, 15 Apr 2021 10:13:46 +0100 Subject: [PATCH 022/123] typo --- docs/visualisation/alchemlyb.visualisation.plot_dF_state.rst | 2 +- docs/visualisation/alchemlyb.visualisation.plot_ti_dhdl.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/visualisation/alchemlyb.visualisation.plot_dF_state.rst b/docs/visualisation/alchemlyb.visualisation.plot_dF_state.rst index e4814efc..a96607be 100644 --- a/docs/visualisation/alchemlyb.visualisation.plot_dF_state.rst +++ b/docs/visualisation/alchemlyb.visualisation.plot_dF_state.rst @@ -25,7 +25,7 @@ The figure could be plotted in *portrait* or *landscape* mode by setting the The user could pass a list of strings to `labels` to name the :class:`~alchemlyb.estimators` or a list of strings to `colors` to color the estimators differently. The unit in the y axis could be labelled to other -units by setting `units`, which by default is :math:`kBT`. +units by setting `units`, which by default is :math:`kT`. Please check :ref:`How to plot dF states ` for a complete example. diff --git a/docs/visualisation/alchemlyb.visualisation.plot_ti_dhdl.rst b/docs/visualisation/alchemlyb.visualisation.plot_ti_dhdl.rst index af984f34..6f9844d7 100644 --- a/docs/visualisation/alchemlyb.visualisation.plot_ti_dhdl.rst +++ b/docs/visualisation/alchemlyb.visualisation.plot_ti_dhdl.rst @@ -11,7 +11,7 @@ When custom labels are desirable, the user could pass a list of strings to the *labels* for labelling each alchemical transformation differently. The color of each alchemical transformation could also be set by passing a list of color string to the *colors*. The unit in the y axis could be labelled to other units -by setting *units*, which by default is :math:`kBT`. The user can pass +by setting *units*, which by default is :math:`kT`. The user can pass :class:`matplotlib.axes.Axes` into the function to have the dhdl drawn on a specific axes. From a92659780a7bc62653a785c2238d99385bf7897a Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 10 Jul 2021 10:26:14 +0100 Subject: [PATCH 023/123] update --- src/alchemlyb/constants.py | 7 ------- src/alchemlyb/estimators/ti_.py | 8 ++++++++ 2 files changed, 8 insertions(+), 7 deletions(-) delete mode 100644 src/alchemlyb/constants.py diff --git a/src/alchemlyb/constants.py b/src/alchemlyb/constants.py deleted file mode 100644 index 459e5ea8..00000000 --- a/src/alchemlyb/constants.py +++ /dev/null @@ -1,7 +0,0 @@ -'''Physical and mathematical constants and units.''' -# Taken from scipy.constants since py2 doesn't support it -#: Boltzmann's constant :math:`k_B` in kJ/(mol K); value from `NIST CODATA: k`_. -Boltzmann_constant = 1.380649e-23 -#: Avogadro constant :math:`N_A` in 1/mol; value from `NIST CODATA: k`_. -Avogadro_constant = 6.02214076e+23 -kJ2kcal = 0.239006 \ No newline at end of file diff --git a/src/alchemlyb/estimators/ti_.py b/src/alchemlyb/estimators/ti_.py index 0344a19f..731c7369 100644 --- a/src/alchemlyb/estimators/ti_.py +++ b/src/alchemlyb/estimators/ti_.py @@ -61,6 +61,8 @@ def fit(self, dHdl): l_types = dHdl.index.names[1:] # obtain vector of delta lambdas between each state + # Fix issue #148, where for pandas == 1.3.0 + # dl = means.reset_index()[list(means.index.names[:])].diff().iloc[1:].values dl = means.reset_index()[means.index.names[:]].diff().iloc[1:].values # apply trapezoid rule to obtain DF between each adjacent state @@ -104,6 +106,10 @@ def fit(self, dHdl): self.states_ = means.index.values.tolist() + self.delta_f_.attrs = dHdl.attrs + self.d_delta_f_.attrs = dHdl.attrs + self.dhdl.attrs = dHdl.attrs + return self def separate_dhdl(self): @@ -129,6 +135,8 @@ def separate_dhdl(self): # get the lambda names l_types = self.dhdl.index.names # obtain bool of changed lambdas between each state + # Fix issue #148, where for pandas == 1.3.0 + # lambdas = self.dhdl.reset_index()[list(l_types)] lambdas = self.dhdl.reset_index()[l_types] diff = lambdas.diff().to_numpy(dtype='bool') # diff will give the first row as NaN so need to fix that From cf142a1f6a483f4524833b6e75d6ed008c16b021 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 10 Jul 2021 10:34:40 +0100 Subject: [PATCH 024/123] revert change --- .../alchemlyb.visualisation.plot_dF_state.rst | 2 +- .../alchemlyb.visualisation.plot_ti_dhdl.rst | 2 +- src/alchemlyb/estimators/ti_.py | 8 --- src/alchemlyb/visualisation/dF_state.py | 13 ++--- src/alchemlyb/visualisation/mbar_matrix.py | 4 +- src/alchemlyb/visualisation/ti_dhdl.py | 52 +++++++------------ 6 files changed, 29 insertions(+), 52 deletions(-) diff --git a/docs/visualisation/alchemlyb.visualisation.plot_dF_state.rst b/docs/visualisation/alchemlyb.visualisation.plot_dF_state.rst index a96607be..d13c1e32 100644 --- a/docs/visualisation/alchemlyb.visualisation.plot_dF_state.rst +++ b/docs/visualisation/alchemlyb.visualisation.plot_dF_state.rst @@ -25,7 +25,7 @@ The figure could be plotted in *portrait* or *landscape* mode by setting the The user could pass a list of strings to `labels` to name the :class:`~alchemlyb.estimators` or a list of strings to `colors` to color the estimators differently. The unit in the y axis could be labelled to other -units by setting `units`, which by default is :math:`kT`. +units by setting `units`, which by default is kcal/mol. Please check :ref:`How to plot dF states ` for a complete example. diff --git a/docs/visualisation/alchemlyb.visualisation.plot_ti_dhdl.rst b/docs/visualisation/alchemlyb.visualisation.plot_ti_dhdl.rst index 6f9844d7..d4247515 100644 --- a/docs/visualisation/alchemlyb.visualisation.plot_ti_dhdl.rst +++ b/docs/visualisation/alchemlyb.visualisation.plot_ti_dhdl.rst @@ -11,7 +11,7 @@ When custom labels are desirable, the user could pass a list of strings to the *labels* for labelling each alchemical transformation differently. The color of each alchemical transformation could also be set by passing a list of color string to the *colors*. The unit in the y axis could be labelled to other units -by setting *units*, which by default is :math:`kT`. The user can pass +by setting *units*, which by default is kcal/mol. The user can pass :class:`matplotlib.axes.Axes` into the function to have the dhdl drawn on a specific axes. diff --git a/src/alchemlyb/estimators/ti_.py b/src/alchemlyb/estimators/ti_.py index 731c7369..0344a19f 100644 --- a/src/alchemlyb/estimators/ti_.py +++ b/src/alchemlyb/estimators/ti_.py @@ -61,8 +61,6 @@ def fit(self, dHdl): l_types = dHdl.index.names[1:] # obtain vector of delta lambdas between each state - # Fix issue #148, where for pandas == 1.3.0 - # dl = means.reset_index()[list(means.index.names[:])].diff().iloc[1:].values dl = means.reset_index()[means.index.names[:]].diff().iloc[1:].values # apply trapezoid rule to obtain DF between each adjacent state @@ -106,10 +104,6 @@ def fit(self, dHdl): self.states_ = means.index.values.tolist() - self.delta_f_.attrs = dHdl.attrs - self.d_delta_f_.attrs = dHdl.attrs - self.dhdl.attrs = dHdl.attrs - return self def separate_dhdl(self): @@ -135,8 +129,6 @@ def separate_dhdl(self): # get the lambda names l_types = self.dhdl.index.names # obtain bool of changed lambdas between each state - # Fix issue #148, where for pandas == 1.3.0 - # lambdas = self.dhdl.reset_index()[list(l_types)] lambdas = self.dhdl.reset_index()[l_types] diff = lambdas.diff().to_numpy(dtype='bool') # diff will give the first row as NaN so need to fix that diff --git a/src/alchemlyb/visualisation/dF_state.py b/src/alchemlyb/visualisation/dF_state.py index 89f96357..3f65f44e 100644 --- a/src/alchemlyb/visualisation/dF_state.py +++ b/src/alchemlyb/visualisation/dF_state.py @@ -14,8 +14,8 @@ from ..estimators import TI, BAR, MBAR -def plot_dF_state(estimators, labels=None, colors=None, units='kT', - scaling_factor=1, orientation='portrait', nb=10): +def plot_dF_state(estimators, labels=None, colors=None, units='kcal/mol', + orientation='portrait', nb=10): '''Plot the dhdl of TI. Parameters @@ -30,10 +30,7 @@ def plot_dF_state(estimators, labels=None, colors=None, units='kT', colors : List list of colors for plotting different estimators. units : str - The unit of the estimate. Default: `kT` - scaling_factor : float - The scaling factor to change the energy from :math:`kT` to the - desired unit. + The unit of the estimate. Default: 'kcal/mol' orientation : string The orientation of the figure. Can be `portrait` or `landscape` nb : int @@ -44,8 +41,8 @@ def plot_dF_state(estimators, labels=None, colors=None, units='kT', matplotlib.figure.Figure An Figure with the dF states drawn. - Note - ---- + Notes + ----- The code is taken and modified from : `Alchemical Analysis `_ diff --git a/src/alchemlyb/visualisation/mbar_matrix.py b/src/alchemlyb/visualisation/mbar_matrix.py index 3b5e9204..682b8555 100644 --- a/src/alchemlyb/visualisation/mbar_matrix.py +++ b/src/alchemlyb/visualisation/mbar_matrix.py @@ -34,8 +34,8 @@ def plot_mbar_overlap_matrix(matrix, skip_lambda_index=[], ax=None): matplotlib.axes.Axes An axes with the overlap matrix drawn. - Note - ---- + Notes + ----- The code is taken and modified from : `Alchemical Analysis `_ diff --git a/src/alchemlyb/visualisation/ti_dhdl.py b/src/alchemlyb/visualisation/ti_dhdl.py index b948d0af..f052c596 100644 --- a/src/alchemlyb/visualisation/ti_dhdl.py +++ b/src/alchemlyb/visualisation/ti_dhdl.py @@ -14,8 +14,7 @@ from matplotlib.font_manager import FontProperties as FP import numpy as np -def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kT', - scaling_factor=1, ax=None): +def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kcal/mol', ax=None): '''Plot the dhdl of TI. Parameters @@ -29,10 +28,7 @@ def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kT', list of colors for plotting all the alchemical transformations. Default: ['r', 'g', '#7F38EC', '#9F000F', 'b', 'y'] units : str - The label for the unit of the estimate. Default: `kT` - scaling_factor : float - The scaling factor to change the energy from :math:`kT` to the - desired unit. + The unit of the estimate. Default: 'kcal/mol' ax : matplotlib.axes.Axes Matplotlib axes object where the plot will be drawn on. If ax=None, a new axes will be generated. @@ -42,26 +38,17 @@ def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kT', matplotlib.axes.Axes An axes with the TI dhdl drawn. - Note - ---- + Notes + ----- The code is taken and modified from : `Alchemical Analysis `_ - The units variable is for labelling only. Changing it doesn't change the - unit of the underlying variable, which is in the unit of kT. The - scaling_factor is used to change the number to the desired unit. - ''' - # Fix unit - - # Make it into a list - if not isinstance(dhdl_data, list): - dhdl_list = dhdl_data.separate_dhdl() - else: - dhdl_list = [] - for dhdl in dhdl_data: - dhdl_list.extend(dhdl.separate_dhdl()) + try: + len(dhdl_data) + except TypeError: + dhdl_data = [dhdl_data, ] if ax is None: fig, ax = plt.subplots(figsize=(8, 6)) @@ -78,33 +65,33 @@ def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kT', # Make level names if labels is None: lv_names2 = [] - for dhdl in dhdl_list: + for dhdl in dhdl_data: # Assume that the dhdl has only one columns - lv_names2.append(dhdl.name.capitalize()) + lv_names2.append(dhdl.dhdl.columns.values[0].capitalize()) else: - if len(labels) == len(dhdl_list): + if len(labels) == len(dhdl_data): lv_names2 = labels else: # pragma: no cover raise ValueError( 'Length of labels ({}) should be the same as the number of data ({})'.format( - len(labels), len(dhdl_list))) + len(labels), len(dhdl_data))) if colors is None: colors = ['r', 'g', '#7F38EC', '#9F000F', 'b', 'y'] else: - if len(colors) >= len(dhdl_list): + if len(colors) >= len(dhdl_data): pass else: # pragma: no cover raise ValueError( 'Number of colors ({}) should be larger than the number of data ({})'.format( - len(labels), len(dhdl_list))) - + len(labels), len(dhdl_data))) + # Get the real data out xs, ndx, dx = [0], 0, 0.001 min_y, max_y = 0, 0 - for dhdl in dhdl_list: - x = dhdl.index.values - y = dhdl.values.ravel() * scaling_factor + for dhdl in dhdl_data: + x = dhdl.dhdl.index.values + y = dhdl.dhdl.values.ravel() min_y = min(y.min(), min_y) max_y = max(y.max(), max_y) @@ -165,7 +152,8 @@ def getInd(r=ri, z=[0]): for i, j in zip(xs[1:], xt[1:]): ax.annotate( ('%.2f' % (i - 1.0 if i > 1.0 else i) if not j == '' else ''), - xy=(i, 0), size=10, rotation=90, va='bottom', ha='center', + xy=(i, 0), xytext=(i, 0.01), size=10, rotation=90, + textcoords=('data', 'axes fraction'), va='bottom', ha='center', color='#151B54') if ndx > 1: lenticks = len(ax.get_ymajorticklabels()) - 1 From ce1539a479dd00085883418a7e445fec8fffdcc3 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 10 Jul 2021 10:36:01 +0100 Subject: [PATCH 025/123] update --- src/alchemlyb/visualisation/dF_state.py | 4 ---- src/alchemlyb/visualisation/ti_dhdl.py | 1 - 2 files changed, 5 deletions(-) diff --git a/src/alchemlyb/visualisation/dF_state.py b/src/alchemlyb/visualisation/dF_state.py index 3f65f44e..569df9b7 100644 --- a/src/alchemlyb/visualisation/dF_state.py +++ b/src/alchemlyb/visualisation/dF_state.py @@ -46,10 +46,6 @@ def plot_dF_state(estimators, labels=None, colors=None, units='kcal/mol', The code is taken and modified from : `Alchemical Analysis `_ - The units variable is for labelling only. Changing it doesn't change the - unit of the underlying variable, which is in the unit of kT. The - scaling_factor is used to change the number to the desired unit. - ''' try: len(estimators) diff --git a/src/alchemlyb/visualisation/ti_dhdl.py b/src/alchemlyb/visualisation/ti_dhdl.py index f052c596..3512d86a 100644 --- a/src/alchemlyb/visualisation/ti_dhdl.py +++ b/src/alchemlyb/visualisation/ti_dhdl.py @@ -85,7 +85,6 @@ def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kcal/mol', ax=None) raise ValueError( 'Number of colors ({}) should be larger than the number of data ({})'.format( len(labels), len(dhdl_data))) - # Get the real data out xs, ndx, dx = [0], 0, 0.001 min_y, max_y = 0, 0 From c5724e12080f95191b8d7f7a5a9ae651acde9b9d Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 10 Jul 2021 10:36:41 +0100 Subject: [PATCH 026/123] Update ti_dhdl.py --- src/alchemlyb/visualisation/ti_dhdl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/alchemlyb/visualisation/ti_dhdl.py b/src/alchemlyb/visualisation/ti_dhdl.py index 3512d86a..7b53217b 100644 --- a/src/alchemlyb/visualisation/ti_dhdl.py +++ b/src/alchemlyb/visualisation/ti_dhdl.py @@ -85,6 +85,7 @@ def plot_ti_dhdl(dhdl_data, labels=None, colors=None, units='kcal/mol', ax=None) raise ValueError( 'Number of colors ({}) should be larger than the number of data ({})'.format( len(labels), len(dhdl_data))) + # Get the real data out xs, ndx, dx = [0], 0, 0.001 min_y, max_y = 0, 0 From 1a9722815181d1514e2792aefc8e05dce9983a35 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 10 Jul 2021 15:12:07 +0100 Subject: [PATCH 027/123] Update abfe.py --- src/alchemlyb/workflows/abfe.py | 46 +++++++++++++-------------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index a6703733..c800cd2f 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -9,7 +9,7 @@ from ..estimators import MBAR, BAR, TI from ..visualisation import (plot_mbar_overlap_matrix, plot_ti_dhdl, plot_dF_state, plot_convergence) -from ..constants import Boltzmann_constant, Avogadro_constant, kJ2kcal +from ..postprocessors.units import get_unit_converter class ABFE(): @@ -191,16 +191,6 @@ def update_units(self, units): ''' if units is not None: self.logger.info('Set unit to {}.'.format(units)) - if units == 'kT': - self.scaling_factor = 1 - elif units == 'kJ/mol': - self.scaling_factor = Boltzmann_constant * self.T * \ - Avogadro_constant / 1000 - elif units == 'kcal/mol': - self.scaling_factor = Boltzmann_constant * self.T * \ - Avogadro_constant / 1000 * kJ2kcal - else: - raise NameError('{} is not a valid unit.'.format(units)) self.units = units else: # pragma: no cover pass @@ -389,6 +379,7 @@ def write(self, resultfilename='result.out'): result_out.append([stage.split('-')[0][:9].rjust(9)+': ', ]) result_out.append(['TOTAL'.rjust(9) + ': ', ]) + converter = get_unit_converter(self.units) for estimator_name, estimator in self.estimator.items(): self.logger.info('write the result from estimator {}'.format( estimator_name)) @@ -399,8 +390,8 @@ def write(self, resultfilename='result.out'): result_out[2].append('---------------------') for index in range(1, num_states): result_out[2+index].append('{:.3f} +- {:.3f}'.format( - estimator.delta_f_.iloc[index-1, index]*self.scaling_factor, - estimator.d_delta_f_.iloc[index-1, index]*self.scaling_factor + converter(estimator.delta_f_.iloc[index-1, index]), + converter(estimator.d_delta_f_.iloc[index-1, index]) ).rjust(21)) result_out[2+num_states].append('---------------------') @@ -430,24 +421,24 @@ def write(self, resultfilename='result.out'): self.logger.info( 'Stage {} is from state {} to state {}.'.format( stage, start, end)) - result = estimator.delta_f_.iloc[start, end]*self.scaling_factor + result = converter(estimator.delta_f_.iloc[start, end]) if estimator_name != 'bar': - error = estimator.d_delta_f_.iloc[start, end]*self.scaling_factor + error = converter(estimator.d_delta_f_.iloc[start, end]) else: - error = np.sqrt(sum( + error = converter(np.sqrt(sum( [estimator.d_delta_f_.iloc[start, start+1]**2 - for i in range(start, end + 1)])) * self.scaling_factor + for i in range(start, end + 1)]))) result_out[3 + num_states + index].append( '{:.3f} +- {:.3f}'.format(result, error,).rjust(21)) # Total result - result = estimator.delta_f_.iloc[0, -1] * self.scaling_factor + result = converter(estimator.delta_f_.iloc[0, -1]) if estimator_name != 'bar': - error = estimator.d_delta_f_.iloc[0, -1] * self.scaling_factor + error = converter(estimator.d_delta_f_.iloc[0, -1]) else: - error = np.sqrt(sum( + error = converter(np.sqrt(sum( [estimator.d_delta_f_.iloc[i, i + 1] ** 2 - for i in range(num_states - 1)])) * self.scaling_factor + for i in range(num_states - 1)]))) result_out[3 + num_states + len(stages)].append( '{:.3f} +- {:.3f}'.format(result, error, ).rjust(21)) self.logger.info('Write results:\n'+ @@ -510,8 +501,7 @@ def plot_ti_dhdl(self, dhdl_TI='dhdl_TI.pdf', labels=None, colors=None, self.logger.info('Plot TI dHdl.') if 'ti' in self.estimator: ax = plot_ti_dhdl(self.estimator['ti'], units=self.units, - labels=labels, colors=colors, ax=ax, - scaling_factor=self.scaling_factor) + labels=labels, colors=colors, ax=ax) ax.figure.savefig(join(self.out, dhdl_TI)) self.logger.info('Plot TI dHdl to {} under {}.' ''.format(dhdl_TI, self.out)) @@ -542,7 +532,6 @@ def plot_dF_state(self, dF_state='dF_state.pdf', labels=None, colors=None, self.logger.info('Plot dF states.') fig = plot_dF_state(self.estimator.values(), labels=labels, colors=colors, units=self.units, - scaling_factor=self.scaling_factor, orientation=orientation, nb=nb) fig.savefig(join(self.out, dF_state)) self.logger.info('Plot dF state to {} under {}.' @@ -691,10 +680,11 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', self.convergence = convergence self.logger.info('Plot convergence analysis to {} under {}.' ''.format(dF_t, self.out)) - ax = plot_convergence(np.array(forward_list) * self.scaling_factor, - np.array(forward_error_list) * self.scaling_factor, - np.array(backward_list) * self.scaling_factor, - np.array(backward_error_list) * self.scaling_factor, + # converter = get_unit_converter(self.units) + ax = plot_convergence(np.array(forward_list), + np.array(forward_error_list), + np.array(backward_list), + np.array(backward_error_list), units=self.units, ax=ax) ax.figure.savefig(join(self.out, dF_t)) return ax From 4d5425bcb6a4f242afcdd33e6b27515931321eb2 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 10 Jul 2021 16:03:17 +0100 Subject: [PATCH 028/123] update to pass the test --- src/alchemlyb/__init__.py | 6 ++- src/alchemlyb/estimators/ti_.py | 1 + src/alchemlyb/tests/test_workflow.py | 10 ---- src/alchemlyb/workflows/abfe.py | 68 ++++++++++++++++------------ 4 files changed, 45 insertions(+), 40 deletions(-) diff --git a/src/alchemlyb/__init__.py b/src/alchemlyb/__init__.py index 363a4f4c..061e2fe2 100644 --- a/src/alchemlyb/__init__.py +++ b/src/alchemlyb/__init__.py @@ -47,7 +47,11 @@ def concat(objs, *args, **kwargs): .. versionadded:: 0.5.0''' # Sanity check - attrs = objs[0].attrs + try: + attrs = objs[0].attrs + except IndexError: # except empty list as input + raise ValueError('No objects to concatenate') + for obj in objs: if attrs != obj.attrs: raise ValueError('All pandas objects should have the same attrs.') diff --git a/src/alchemlyb/estimators/ti_.py b/src/alchemlyb/estimators/ti_.py index 731c7369..e7c8eaee 100644 --- a/src/alchemlyb/estimators/ti_.py +++ b/src/alchemlyb/estimators/ti_.py @@ -150,6 +150,7 @@ def separate_dhdl(self): for l in l_types: if l != l_types[i]: new = new.reset_index(l, drop=True) + new.attrs = self.dhdl.attrs dhdl_list.append(new) return dhdl_list diff --git a/src/alchemlyb/tests/test_workflow.py b/src/alchemlyb/tests/test_workflow.py index e0366839..c9acd6cd 100644 --- a/src/alchemlyb/tests/test_workflow.py +++ b/src/alchemlyb/tests/test_workflow.py @@ -291,16 +291,6 @@ def workflow(): suffix='bz2', T=310) return workflow - def test_change_unit(self, workflow): - workflow.update_units('kT') - assert workflow.scaling_factor == 1 - workflow.update_units('kcal/mol') - assert np.isclose(workflow.scaling_factor, 0.6, atol=0.1) - workflow.update_units('kJ/mol') - assert np.isclose(workflow.scaling_factor, 2.6, atol=0.1) - with pytest.raises(NameError): - workflow.update_units('aaa') - def test_uncorr_threshold(self, workflow): original_u_nk = workflow.u_nk_list original_dHdl = workflow.dHdl_list diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index c800cd2f..0a1dd9c6 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -10,6 +10,7 @@ from ..visualisation import (plot_mbar_overlap_matrix, plot_ti_dhdl, plot_dF_state, plot_convergence) from ..postprocessors.units import get_unit_converter +from .. import concat class ABFE(): @@ -307,18 +308,18 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): # Use unprocessed data if preprocess is not performed. if 'ti' in methods: try: - dHdl = pd.concat(self.dHdl_sample_list) + dHdl = concat(self.dHdl_sample_list) except (AttributeError, ValueError): - dHdl = pd.concat(self.dHdl_list) + dHdl = concat(self.dHdl_list) self.logger.warning('dHdl has not been preprocessed.') self.logger.info( 'A total {} lines of dHdl is used.'.format(len(dHdl))) if 'bar' in methods or 'mbar' in methods: try: - u_nk = pd.concat(self.u_nk_sample_list) + u_nk = concat(self.u_nk_sample_list) except (AttributeError, ValueError): - u_nk = pd.concat(self.u_nk_list) + u_nk = concat(self.u_nk_list) self.logger.warning('u_nk has not been preprocessed.') self.logger.info( 'A total {} lines of u_nk is used.'.format(len(u_nk))) @@ -383,6 +384,10 @@ def write(self, resultfilename='result.out'): for estimator_name, estimator in self.estimator.items(): self.logger.info('write the result from estimator {}'.format( estimator_name)) + + # Do the unit conversion + delta_f_ = converter(estimator.delta_f_) + d_delta_f_ = converter(estimator.d_delta_f_) # Write the estimator header result_out[0].append('---------------------') result_out[1].append('{} ({}) '.format( @@ -390,8 +395,8 @@ def write(self, resultfilename='result.out'): result_out[2].append('---------------------') for index in range(1, num_states): result_out[2+index].append('{:.3f} +- {:.3f}'.format( - converter(estimator.delta_f_.iloc[index-1, index]), - converter(estimator.d_delta_f_.iloc[index-1, index]) + delta_f_.iloc[index-1, index], + d_delta_f_.iloc[index-1, index] ).rjust(21)) result_out[2+num_states].append('---------------------') @@ -421,24 +426,24 @@ def write(self, resultfilename='result.out'): self.logger.info( 'Stage {} is from state {} to state {}.'.format( stage, start, end)) - result = converter(estimator.delta_f_.iloc[start, end]) + result = delta_f_.iloc[start, end] if estimator_name != 'bar': - error = converter(estimator.d_delta_f_.iloc[start, end]) + error = d_delta_f_.iloc[start, end] else: - error = converter(np.sqrt(sum( - [estimator.d_delta_f_.iloc[start, start+1]**2 - for i in range(start, end + 1)]))) + error = np.sqrt(sum( + [d_delta_f_.iloc[start, start+1]**2 + for i in range(start, end + 1)])) result_out[3 + num_states + index].append( '{:.3f} +- {:.3f}'.format(result, error,).rjust(21)) # Total result - result = converter(estimator.delta_f_.iloc[0, -1]) + result = delta_f_.iloc[0, -1] if estimator_name != 'bar': - error = converter(estimator.d_delta_f_.iloc[0, -1]) + error = d_delta_f_.iloc[0, -1] else: - error = converter(np.sqrt(sum( - [estimator.d_delta_f_.iloc[i, i + 1] ** 2 - for i in range(num_states - 1)]))) + error = np.sqrt(sum( + [d_delta_f_.iloc[i, i + 1] ** 2 + for i in range(num_states - 1)])) result_out[3 + num_states + len(stages)].append( '{:.3f} +- {:.3f}'.format(result, error, ).rjust(21)) self.logger.info('Write results:\n'+ @@ -616,6 +621,8 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', self.logger.warning( '{} is not a valid estimator.'.format(estimator)) + converter = get_unit_converter(self.units) + self.logger.info('Begin forward analysis') forward_list = [] forward_error_list = [] @@ -631,16 +638,17 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', else: # pragma: no cover raise NameError( '{} is not a valid estimator.'.format(estimator)) - sample = pd.concat(sample) + sample = concat(sample) result = estimator_fit(sample) - forward_list.append(result.delta_f_.iloc[0, -1]) + forward_list.append(converter(result.delta_f_).iloc[0, -1]) if estimator.lower() == 'bar': error = np.sqrt(sum( - [result.d_delta_f_.iloc[i, i + 1] ** 2 + [converter(result.d_delta_f_).iloc[i, i + 1] ** 2 for i in range(len(result.d_delta_f_) - 1)])) forward_error_list.append(error) else: - forward_error_list.append(result.d_delta_f_.iloc[0, -1]) + forward_error_list.append(converter(result.d_delta_f_).iloc[ + 0, -1]) self.logger.info('{:.2f} +/- {:.2f} kT'.format(forward_list[-1], forward_error_list[-1])) @@ -659,28 +667,30 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', else: # pragma: no cover raise NameError( '{} is not a valid estimator.'.format(estimator)) - sample = pd.concat(sample) + sample = concat(sample) result = estimator_fit(sample) - backward_list.append(result.delta_f_.iloc[0, -1]) + backward_list.append(converter(result.delta_f_).iloc[0, -1]) if estimator.lower() == 'bar': error = np.sqrt(sum( - [result.d_delta_f_.iloc[i, i + 1] ** 2 + [converter(result.d_delta_f_).iloc[i, i + 1] ** 2 for i in range(len(result.d_delta_f_) - 1)])) backward_error_list.append(error) else: - backward_error_list.append(result.d_delta_f_.iloc[0, -1]) + backward_error_list.append(converter( + result.d_delta_f_).iloc[0, -1]) self.logger.info('{:.2f} +/- {:.2f} kT'.format(backward_list[-1], backward_error_list[-1])) - convergence = pd.DataFrame({'Forward (kT)': forward_list, - 'F. Error (kT)': forward_error_list, - 'Backward (kT)': backward_list, - 'B. Error (kT)': backward_error_list}) + convergence = pd.DataFrame( + {'Forward ({})'.format(self.units): forward_list, + 'F. Error ({})'.format(self.units): forward_error_list, + 'Backward ({})'.format(self.units): backward_list, + 'B. Error ({})'.format(self.units): backward_error_list}) self.convergence = convergence self.logger.info('Plot convergence analysis to {} under {}.' ''.format(dF_t, self.out)) - # converter = get_unit_converter(self.units) + ax = plot_convergence(np.array(forward_list), np.array(forward_error_list), np.array(backward_list), From ef529527ccf1b779ab3b065f62a41c1337f4f030 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Tue, 13 Jul 2021 18:58:34 +0100 Subject: [PATCH 029/123] Update mbar_.py --- src/alchemlyb/estimators/mbar_.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py index c7e95b0d..ad8b75cc 100644 --- a/src/alchemlyb/estimators/mbar_.py +++ b/src/alchemlyb/estimators/mbar_.py @@ -48,9 +48,7 @@ class MBAR(BaseEstimator): """ def __init__(self, maximum_iterations=10000, relative_tolerance=1.0e-7, - initial_f_k=None, method='adaptive', verbose=False): - # method='adaptive' is used as it is more stable - # https://github.com/choderalab/pymbar/issues/419 + initial_f_k=None, method='hybr', verbose=False): self.maximum_iterations = maximum_iterations self.relative_tolerance = relative_tolerance From 9d2eb067199c2cf05690e39e4cd09a43af0943ec Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 2 Aug 2021 11:09:37 +0100 Subject: [PATCH 030/123] update --- docs/index.rst | 2 +- docs/workflow.rst | 9 +++- ...test_workflow.py => test_workflow_ABFE.py} | 0 src/alchemlyb/workflows/abfe.py | 41 ++++++++++++++++--- 4 files changed, 43 insertions(+), 9 deletions(-) rename src/alchemlyb/tests/{test_workflow.py => test_workflow_ABFE.py} (100%) diff --git a/docs/index.rst b/docs/index.rst index 8d91f9b0..3887f1c0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -66,12 +66,12 @@ Contributions are very welcome. If you have bug reports or feature requests or q :caption: User Documentation install - workflow parsing preprocessing estimators postprocessing visualisation + workflow .. toctree:: :maxdepth: 1 diff --git a/docs/workflow.rst b/docs/workflow.rst index 2a68f5d2..f6c470d9 100644 --- a/docs/workflow.rst +++ b/docs/workflow.rst @@ -2,13 +2,17 @@ Automatic workflow ================== Though **alchemlyb** is a library offering great flexibility in deriving free energy estimate, it also provide a easy pipeline that is similar to -`Alchemical Analysis `_ and a +`Alchemical Analysis `_ and a step-by-step version that allows more flexibility. +Note +---- +This is an experimental feature and is not API stable. + Fully Automatic analysis ------------------------ A interface similar to -`Alchemical Analysis `_ +`Alchemical Analysis `_ could be excuted with a single line of command. :: >>> import os @@ -107,3 +111,4 @@ to the data generated at each stage of the analysis. :: .. autofunction:: plot_dF_state .. autofunction:: check_convergence +.. _Alchemical Analysis: https://github.com/MobleyLab/alchemical-analysis \ No newline at end of file diff --git a/src/alchemlyb/tests/test_workflow.py b/src/alchemlyb/tests/test_workflow_ABFE.py similarity index 100% rename from src/alchemlyb/tests/test_workflow.py rename to src/alchemlyb/tests/test_workflow_ABFE.py diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 0a1dd9c6..c8d89cdf 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -1,3 +1,4 @@ +import os from os.path import join from glob import glob import pandas as pd @@ -11,6 +12,7 @@ plot_dF_state, plot_convergence) from ..postprocessors.units import get_unit_converter from .. import concat +from .. import __version__ class ABFE(): @@ -24,7 +26,7 @@ class ABFE(): software : str The software used for generating input. {'Gromacs', } dir : str - Directory in which data files are stored. Default: './'. + Directory in which data files are stored. Default: os.path.curdir. prefix : str Prefix for datafile sets. Default: 'dhdl'. suffix : str @@ -46,7 +48,7 @@ class ABFE(): A list of the methods to esitimate the free energy with. Default: None. out : str Directory in which the output files produced by this script will be - stored. Default: './'. + stored. Default: os.path.curdir. resultfilename : str custom defined result filename. Default: None. (not writing the result) overlap : str @@ -62,7 +64,9 @@ class ABFE(): points (an integer) must be provided. Default: None. (not doing convergence analysis). log : str - The filename of the log file. Default: 'result.log' + The filename of the log file. The workflow logs under + alchemlyb.workflows.ABFE. Default: + 'result.log' Attributes ---------- @@ -75,15 +79,16 @@ class ABFE(): dHdl_list : list The list of dHdl read from the files. ''' - def __init__(self, units='kcal/mol', software='Gromacs', dir='./', + def __init__(self, units='kcal/mol', software='Gromacs', dir=os.path.curdir, prefix='dhdl', suffix='xvg', T=298, skiptime=0, uncorr=None, - threshold=50, methods=None, out='./', resultfilename=None, + threshold=50, methods=None, out=os.path.curdir, resultfilename=None, overlap=None, breakdown=None, forwrev=None, log='result.log'): logging.basicConfig(filename=log, level=logging.INFO) self.logger = logging.getLogger('alchemlyb.workflows.ABFE') self.logger.info('Initialise Alchemlyb ABFE Workflow') + self.logger.info('Alchemlyb Version: {}'.format(__version__)) self.logger.info('Set temperature to {} K.'.format(T)) self.T = T @@ -246,6 +251,28 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): col = u_nk[key[0]] subsample = statistical_inefficiency(u_nk, col, sort=True, drop_duplicates=True) + # This part is commented out as it duplicates #98 + # The user could restore this part if it is desired. + + # elif uncorr == 'dhdl_all': + # subsample = statistical_inefficiency(u_nk, u_nk.sum(axis=1), + # sort = True, + # drop_duplicates = True) + # elif uncorr == 'dE': + # # Using the same logic as alchemical-analysis + # key = u_nk.index.values[0][1:] + # index = u_nk.columns.values.tolist().index(key) + # # for the state that is not the last state, take the state+1 + # if index + 1 < len(u_nk.columns): + # subsample = statistical_inefficiency( + # u_nk, u_nk.iloc[:, index + 1]) + # # for the state that is the last state, take the state-1 + # else: + # subsample = statistical_inefficiency( + # u_nk, u_nk.iloc[:, index - 1], + # sort = True, + # drop_duplicates = True) + else: # pragma: no cover # The dhdl_all and dE will be implemented here when #48 is # merged @@ -269,7 +296,9 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): self.dHdl_sample_list = [] for index, dHdl in enumerate(self.dHdl_list): dHdl = dHdl[dHdl.index.get_level_values('time') >= skiptime] - subsample = statistical_inefficiency(dHdl, dHdl.sum(axis=1)) + subsample = statistical_inefficiency(dHdl, dHdl.sum(axis=1), + sort=True, + drop_duplicates=True) if len(subsample) < threshold: self.logger.warning('Number of dHdl {} for state {} is ' 'less than the threshold {}.'.format( From ab1ab76633d11b8126e38c773ec15f7954501384 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 2 Aug 2021 11:30:44 +0100 Subject: [PATCH 031/123] Update abfe.py --- src/alchemlyb/workflows/abfe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index c8d89cdf..94e82ee4 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -22,7 +22,7 @@ class ABFE(): ---------- units : str The unit used for printing and plotting results. {'kcal/mol', 'kJ/mol', - 'kT'} + 'kT'}. Default: 'kT'. software : str The software used for generating input. {'Gromacs', } dir : str @@ -79,7 +79,7 @@ class ABFE(): dHdl_list : list The list of dHdl read from the files. ''' - def __init__(self, units='kcal/mol', software='Gromacs', dir=os.path.curdir, + def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, prefix='dhdl', suffix='xvg', T=298, skiptime=0, uncorr=None, threshold=50, methods=None, out=os.path.curdir, resultfilename=None, overlap=None, breakdown=None, forwrev=None, From 10fbdc46f8dea88a1b30103fb7d27028765507ad Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 2 Aug 2021 11:33:12 +0100 Subject: [PATCH 032/123] Update mbar_.py --- src/alchemlyb/estimators/mbar_.py | 72 ++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py index ad8b75cc..8d4d8719 100644 --- a/src/alchemlyb/estimators/mbar_.py +++ b/src/alchemlyb/estimators/mbar_.py @@ -4,6 +4,8 @@ from sklearn.base import BaseEstimator from pymbar import MBAR as MBAR_ +import pymbar +from pymbar.utils import ParameterError class MBAR(BaseEstimator): @@ -48,12 +50,15 @@ class MBAR(BaseEstimator): """ def __init__(self, maximum_iterations=10000, relative_tolerance=1.0e-7, - initial_f_k=None, method='hybr', verbose=False): + initial_f_k=None, method=None, verbose=False): self.maximum_iterations = maximum_iterations self.relative_tolerance = relative_tolerance self.initial_f_k = initial_f_k - self.method = [dict(method=method)] + if method is None: + self.method = None + else: + self.method = [dict(method=method)] self.verbose = verbose # handle for pymbar.MBAR object @@ -73,30 +78,65 @@ def fit(self, u_nk): """ # sort by state so that rows from same state are in contiguous blocks u_nk = u_nk.sort_index(level=u_nk.index.names[1:]) - + groups = u_nk.groupby(level=u_nk.index.names[1:]) - N_k = [(len(groups.get_group(i)) if i in groups.groups else 0) for i in u_nk.columns] - - self._mbar = MBAR_(u_nk.T, N_k, - maximum_iterations=self.maximum_iterations, - relative_tolerance=self.relative_tolerance, - initial_f_k=self.initial_f_k, - solver_protocol=self.method, - verbose=self.verbose) + N_k = [(len(groups.get_group(i)) if i in groups.groups else 0) for i in + u_nk.columns] + + if self.method is None: + try: + self._mbar = MBAR_(u_nk.T, N_k, + maximum_iterations=self.maximum_iterations, + relative_tolerance=self.relative_tolerance, + initial_f_k=self.initial_f_k, + solver_protocol=[dict(method='hybr')], + verbose=self.verbose) + # set attributes + out = self._mbar.getFreeEnergyDifferences(return_theta=True) + except: + try: + self._mbar = MBAR_(u_nk.T, N_k, + maximum_iterations=self.maximum_iterations, + relative_tolerance=self.relative_tolerance, + initial_f_k=self.initial_f_k, + solver_protocol=[ + dict(method='adaptive')], + verbose=self.verbose) + # set attributes + out = self._mbar.getFreeEnergyDifferences( + return_theta=True) + except: + self._mbar = MBAR_(u_nk.T, N_k, + maximum_iterations=self.maximum_iterations, + relative_tolerance=self.relative_tolerance, + initial_f_k=self.initial_f_k, + solver_protocol=[dict(method='BFGS')], + verbose=self.verbose) + # set attributes + out = self._mbar.getFreeEnergyDifferences( + return_theta=True) + else: + self._mbar = MBAR_(u_nk.T, N_k, + maximum_iterations=self.maximum_iterations, + relative_tolerance=self.relative_tolerance, + initial_f_k=self.initial_f_k, + solver_protocol=self.method, + verbose=self.verbose) + # set attributes + out = self._mbar.getFreeEnergyDifferences(return_theta=True) self.states_ = u_nk.columns.values.tolist() - # set attributes - out = self._mbar.getFreeEnergyDifferences(return_theta=True) free_energy_differences = [pd.DataFrame(i, - columns=self.states_, - index=self.states_) for i in out] + columns=self.states_, + index=self.states_) for i in + out] (self.delta_f_, self.d_delta_f_, self.theta_) = free_energy_differences self.delta_f_.attrs = u_nk.attrs self.d_delta_f_.attrs = u_nk.attrs - + return self def predict(self, u_ln): From 4a72c2afc18e707adbeb656e013a7c2643baa705 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 2 Aug 2021 11:41:34 +0100 Subject: [PATCH 033/123] Revert "Update mbar_.py" This reverts commit 10fbdc46f8dea88a1b30103fb7d27028765507ad. --- src/alchemlyb/estimators/mbar_.py | 72 +++++++------------------------ 1 file changed, 16 insertions(+), 56 deletions(-) diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py index 8d4d8719..ad8b75cc 100644 --- a/src/alchemlyb/estimators/mbar_.py +++ b/src/alchemlyb/estimators/mbar_.py @@ -4,8 +4,6 @@ from sklearn.base import BaseEstimator from pymbar import MBAR as MBAR_ -import pymbar -from pymbar.utils import ParameterError class MBAR(BaseEstimator): @@ -50,15 +48,12 @@ class MBAR(BaseEstimator): """ def __init__(self, maximum_iterations=10000, relative_tolerance=1.0e-7, - initial_f_k=None, method=None, verbose=False): + initial_f_k=None, method='hybr', verbose=False): self.maximum_iterations = maximum_iterations self.relative_tolerance = relative_tolerance self.initial_f_k = initial_f_k - if method is None: - self.method = None - else: - self.method = [dict(method=method)] + self.method = [dict(method=method)] self.verbose = verbose # handle for pymbar.MBAR object @@ -78,65 +73,30 @@ def fit(self, u_nk): """ # sort by state so that rows from same state are in contiguous blocks u_nk = u_nk.sort_index(level=u_nk.index.names[1:]) - + groups = u_nk.groupby(level=u_nk.index.names[1:]) - N_k = [(len(groups.get_group(i)) if i in groups.groups else 0) for i in - u_nk.columns] - - if self.method is None: - try: - self._mbar = MBAR_(u_nk.T, N_k, - maximum_iterations=self.maximum_iterations, - relative_tolerance=self.relative_tolerance, - initial_f_k=self.initial_f_k, - solver_protocol=[dict(method='hybr')], - verbose=self.verbose) - # set attributes - out = self._mbar.getFreeEnergyDifferences(return_theta=True) - except: - try: - self._mbar = MBAR_(u_nk.T, N_k, - maximum_iterations=self.maximum_iterations, - relative_tolerance=self.relative_tolerance, - initial_f_k=self.initial_f_k, - solver_protocol=[ - dict(method='adaptive')], - verbose=self.verbose) - # set attributes - out = self._mbar.getFreeEnergyDifferences( - return_theta=True) - except: - self._mbar = MBAR_(u_nk.T, N_k, - maximum_iterations=self.maximum_iterations, - relative_tolerance=self.relative_tolerance, - initial_f_k=self.initial_f_k, - solver_protocol=[dict(method='BFGS')], - verbose=self.verbose) - # set attributes - out = self._mbar.getFreeEnergyDifferences( - return_theta=True) - else: - self._mbar = MBAR_(u_nk.T, N_k, - maximum_iterations=self.maximum_iterations, - relative_tolerance=self.relative_tolerance, - initial_f_k=self.initial_f_k, - solver_protocol=self.method, - verbose=self.verbose) - # set attributes - out = self._mbar.getFreeEnergyDifferences(return_theta=True) + N_k = [(len(groups.get_group(i)) if i in groups.groups else 0) for i in u_nk.columns] + + self._mbar = MBAR_(u_nk.T, N_k, + maximum_iterations=self.maximum_iterations, + relative_tolerance=self.relative_tolerance, + initial_f_k=self.initial_f_k, + solver_protocol=self.method, + verbose=self.verbose) self.states_ = u_nk.columns.values.tolist() + # set attributes + out = self._mbar.getFreeEnergyDifferences(return_theta=True) free_energy_differences = [pd.DataFrame(i, - columns=self.states_, - index=self.states_) for i in - out] + columns=self.states_, + index=self.states_) for i in out] (self.delta_f_, self.d_delta_f_, self.theta_) = free_energy_differences self.delta_f_.attrs = u_nk.attrs self.d_delta_f_.attrs = u_nk.attrs - + return self def predict(self, u_ln): From dd1735e6b3b8625df4280b6d1480e0a81645acf1 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 2 Aug 2021 11:42:25 +0100 Subject: [PATCH 034/123] Update mbar_.py --- src/alchemlyb/estimators/mbar_.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py index ad8b75cc..2454e1f4 100644 --- a/src/alchemlyb/estimators/mbar_.py +++ b/src/alchemlyb/estimators/mbar_.py @@ -48,8 +48,9 @@ class MBAR(BaseEstimator): """ def __init__(self, maximum_iterations=10000, relative_tolerance=1.0e-7, - initial_f_k=None, method='hybr', verbose=False): - + initial_f_k=None, method='adaptive', verbose=False): + # method='adaptive' is used as it is more stable + # https://github.com/choderalab/pymbar/issues/419 self.maximum_iterations = maximum_iterations self.relative_tolerance = relative_tolerance self.initial_f_k = initial_f_k From cd2ff92b6a72c1f545e8ee9b5aeff1be26e50d9f Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 25 Sep 2021 20:53:01 +0100 Subject: [PATCH 035/123] Create convergence.py --- src/alchemlyb/postprocessors/convergence.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/alchemlyb/postprocessors/convergence.py diff --git a/src/alchemlyb/postprocessors/convergence.py b/src/alchemlyb/postprocessors/convergence.py new file mode 100644 index 00000000..e69de29b From aa3054e8b4d64fadc447f95b0f3e19a3267b06fe Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 25 Sep 2021 20:53:21 +0100 Subject: [PATCH 036/123] Update convergence.py --- src/alchemlyb/postprocessors/convergence.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/alchemlyb/postprocessors/convergence.py b/src/alchemlyb/postprocessors/convergence.py index e69de29b..618d76eb 100644 --- a/src/alchemlyb/postprocessors/convergence.py +++ b/src/alchemlyb/postprocessors/convergence.py @@ -0,0 +1,2 @@ +def test(): + pass \ No newline at end of file From ef21b2d9d6a2343538e1faee00bfb9375a627b77 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 25 Sep 2021 21:14:59 +0100 Subject: [PATCH 037/123] Update convergence.py --- src/alchemlyb/postprocessors/convergence.py | 139 +++++++++++++++++++- 1 file changed, 137 insertions(+), 2 deletions(-) diff --git a/src/alchemlyb/postprocessors/convergence.py b/src/alchemlyb/postprocessors/convergence.py index 618d76eb..641685b9 100644 --- a/src/alchemlyb/postprocessors/convergence.py +++ b/src/alchemlyb/postprocessors/convergence.py @@ -1,2 +1,137 @@ -def test(): - pass \ No newline at end of file +import pandas as pd +def forward_backward_convergence(df_list, estimator='mbar', num=10): + ''' The forward and backward convergence of the free energy estimate. + + Generate the free energy change as a function of time in both + directions, with the specified number of points in the time. + + Parameters + ---------- + df_list : list + List of DataFrame of either dHdl or u_nk. + estimator : {'mbar', 'bar', 'ti'} + Name of the estimators. + num : int + The number of time points. + + Returns + ------- + DataFrame + The DataFrame with convergence data. :: + Forward F. Error Backward B. Error + 0 33.988935 0.334676 35.666128 0.324426 + 1 35.075489 0.232150 35.382850 0.230944 + 2 34.919988 0.190424 35.156028 0.189489 + 3 34.929927 0.165316 35.242255 0.164400 + 4 34.957007 0.147852 35.247704 0.147191 + 5 35.003660 0.134952 35.214658 0.134458 + 6 35.070199 0.124956 35.178422 0.124664 + 7 35.019853 0.116970 35.096870 0.116783 + 8 35.035123 0.110147 35.225907 0.109742 + 9 35.113417 0.104280 35.113417 0.104280 + + ''' + self.logger.info('Start convergence analysis.') + self.logger.info('Check data availability.') + + try: + dHdl_list = self.dHdl_sample_list + self.logger.info('Subsampled dHdl is available.') + except AttributeError: + try: + dHdl_list = self.dHdl_list + self.logger.info('Subsampled dHdl not available, ' + 'use original data instead.') + except AttributeError: # pragma: no cover + self.logger.warning('dHdl is not available.') + + try: + u_nk_list = self.u_nk_sample_list + self.logger.info('Subsampled u_nk is available.') + except AttributeError: + try: + u_nk_list = self.u_nk_list + self.logger.info('Subsampled u_nk not available, ' + 'use original data instead.') + except AttributeError: # pragma: no cover + self.logger.warning('u_nk is not available.') + + if estimator.lower() == 'mbar': + self.logger.info('Use MBAR estimator for convergence analysis.') + estimator_fit = MBAR().fit + elif estimator.lower() == 'bar': + self.logger.info('Use BAR estimator for convergence analysis.') + estimator_fit = BAR().fit + elif estimator.lower() == 'ti': + self.logger.info('Use TI estimator for convergence analysis.') + estimator_fit = TI().fit + else: # pragma: no cover + self.logger.warning( + '{} is not a valid estimator.'.format(estimator)) + + converter = get_unit_converter(self.units) + + self.logger.info('Begin forward analysis') + forward_list = [] + forward_error_list = [] + for i in range(1, forwrev + 1): + self.logger.info('Forward analysis: {:.2f}%'.format(i / forwrev)) + sample = [] + if estimator.lower() in ['mbar', 'bar']: + for data in u_nk_list: + sample.append(data[:len(data) // forwrev * i]) + elif estimator.lower() == 'ti': + for data in dHdl_list: + sample.append(data[:len(data) // forwrev * i]) + else: # pragma: no cover + raise NameError( + '{} is not a valid estimator.'.format(estimator)) + sample = concat(sample) + result = estimator_fit(sample) + forward_list.append(converter(result.delta_f_).iloc[0, -1]) + if estimator.lower() == 'bar': + error = np.sqrt(sum( + [converter(result.d_delta_f_).iloc[i, i + 1] ** 2 + for i in range(len(result.d_delta_f_) - 1)])) + forward_error_list.append(error) + else: + forward_error_list.append(converter(result.d_delta_f_).iloc[ + 0, -1]) + self.logger.info('{:.2f} +/- {:.2f} kT'.format(forward_list[-1], + forward_error_list[-1])) + + self.logger.info('Begin backward analysis') + backward_list = [] + backward_error_list = [] + for i in range(1, forwrev + 1): + self.logger.info('Backward analysis: {:.2f}%'.format(i / forwrev)) + sample = [] + if estimator.lower() in ['mbar', 'bar']: + for data in u_nk_list: + sample.append(data[-len(data) // forwrev * i:]) + elif estimator.lower() == 'ti': + for data in dHdl_list: + sample.append(data[-len(data) // forwrev * i:]) + else: # pragma: no cover + raise NameError( + '{} is not a valid estimator.'.format(estimator)) + sample = concat(sample) + result = estimator_fit(sample) + backward_list.append(converter(result.delta_f_).iloc[0, -1]) + if estimator.lower() == 'bar': + error = np.sqrt(sum( + [converter(result.d_delta_f_).iloc[i, i + 1] ** 2 + for i in range(len(result.d_delta_f_) - 1)])) + backward_error_list.append(error) + else: + backward_error_list.append(converter( + result.d_delta_f_).iloc[0, -1]) + self.logger.info('{:.2f} +/- {:.2f} kT'.format(backward_list[-1], + backward_error_list[ + -1])) + + convergence = pd.DataFrame( + {'Forward ({})'.format(self.units): forward_list, + 'F. Error ({})'.format(self.units): forward_error_list, + 'Backward ({})'.format(self.units): backward_list, + 'B. Error ({})'.format(self.units): backward_error_list}) \ No newline at end of file From 7238298fd978a294b3d1e64fe6cb069cd4e1cb85 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 26 Sep 2021 12:10:50 +0100 Subject: [PATCH 038/123] update --- src/alchemlyb/postprocessors/__init__.py | 3 + src/alchemlyb/postprocessors/convergence.py | 111 +++++++------------- src/alchemlyb/tests/test_convergence.py | 38 +++++++ 3 files changed, 81 insertions(+), 71 deletions(-) create mode 100644 src/alchemlyb/tests/test_convergence.py diff --git a/src/alchemlyb/postprocessors/__init__.py b/src/alchemlyb/postprocessors/__init__.py index 6e769ac4..563c2196 100644 --- a/src/alchemlyb/postprocessors/__init__.py +++ b/src/alchemlyb/postprocessors/__init__.py @@ -1,3 +1,6 @@ +from .convergence import forward_backward_convergence + __all__ = [ 'units', + 'forward_backward_convergence' ] diff --git a/src/alchemlyb/postprocessors/convergence.py b/src/alchemlyb/postprocessors/convergence.py index 641685b9..7ecc07ea 100644 --- a/src/alchemlyb/postprocessors/convergence.py +++ b/src/alchemlyb/postprocessors/convergence.py @@ -1,4 +1,10 @@ import pandas as pd +import logging +import numpy as np + +from ..estimators import MBAR, BAR, TI +from .. import concat + def forward_backward_convergence(df_list, estimator='mbar', num=10): ''' The forward and backward convergence of the free energy estimate. @@ -31,107 +37,70 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): 9 35.113417 0.104280 35.113417 0.104280 ''' - self.logger.info('Start convergence analysis.') - self.logger.info('Check data availability.') - - try: - dHdl_list = self.dHdl_sample_list - self.logger.info('Subsampled dHdl is available.') - except AttributeError: - try: - dHdl_list = self.dHdl_list - self.logger.info('Subsampled dHdl not available, ' - 'use original data instead.') - except AttributeError: # pragma: no cover - self.logger.warning('dHdl is not available.') - - try: - u_nk_list = self.u_nk_sample_list - self.logger.info('Subsampled u_nk is available.') - except AttributeError: - try: - u_nk_list = self.u_nk_list - self.logger.info('Subsampled u_nk not available, ' - 'use original data instead.') - except AttributeError: # pragma: no cover - self.logger.warning('u_nk is not available.') + logger = logging.getLogger('alchemlyb.postprocessors.' + 'forward_backward_convergence') + logger.info('Start convergence analysis.') + logger.info('Check data availability.') if estimator.lower() == 'mbar': - self.logger.info('Use MBAR estimator for convergence analysis.') + logger.info('Use MBAR estimator for convergence analysis.') estimator_fit = MBAR().fit elif estimator.lower() == 'bar': - self.logger.info('Use BAR estimator for convergence analysis.') + logger.info('Use BAR estimator for convergence analysis.') estimator_fit = BAR().fit elif estimator.lower() == 'ti': - self.logger.info('Use TI estimator for convergence analysis.') + logger.info('Use TI estimator for convergence analysis.') estimator_fit = TI().fit else: # pragma: no cover - self.logger.warning( + logger.warning( '{} is not a valid estimator.'.format(estimator)) - converter = get_unit_converter(self.units) - - self.logger.info('Begin forward analysis') + logger.info('Begin forward analysis') forward_list = [] forward_error_list = [] - for i in range(1, forwrev + 1): - self.logger.info('Forward analysis: {:.2f}%'.format(i / forwrev)) + for i in range(1, num + 1): + logger.info('Forward analysis: {:.2f}%'.format(i / num)) sample = [] - if estimator.lower() in ['mbar', 'bar']: - for data in u_nk_list: - sample.append(data[:len(data) // forwrev * i]) - elif estimator.lower() == 'ti': - for data in dHdl_list: - sample.append(data[:len(data) // forwrev * i]) - else: # pragma: no cover - raise NameError( - '{} is not a valid estimator.'.format(estimator)) + for data in df_list: + sample.append(data[:len(data) // num * i]) sample = concat(sample) result = estimator_fit(sample) - forward_list.append(converter(result.delta_f_).iloc[0, -1]) + forward_list.append(result.delta_f_.iloc[0, -1]) if estimator.lower() == 'bar': error = np.sqrt(sum( - [converter(result.d_delta_f_).iloc[i, i + 1] ** 2 + [result.d_delta_f_.iloc[i, i + 1] ** 2 for i in range(len(result.d_delta_f_) - 1)])) forward_error_list.append(error) else: - forward_error_list.append(converter(result.d_delta_f_).iloc[ - 0, -1]) - self.logger.info('{:.2f} +/- {:.2f} kT'.format(forward_list[-1], - forward_error_list[-1])) + forward_error_list.append(result.d_delta_f_.iloc[0, -1]) + logger.info('{:.2f} +/- {:.2f} kT'.format(forward_list[-1], + forward_error_list[-1])) - self.logger.info('Begin backward analysis') + logger.info('Begin backward analysis') backward_list = [] backward_error_list = [] - for i in range(1, forwrev + 1): - self.logger.info('Backward analysis: {:.2f}%'.format(i / forwrev)) + for i in range(1, num + 1): + logger.info('Backward analysis: {:.2f}%'.format(i / num)) sample = [] - if estimator.lower() in ['mbar', 'bar']: - for data in u_nk_list: - sample.append(data[-len(data) // forwrev * i:]) - elif estimator.lower() == 'ti': - for data in dHdl_list: - sample.append(data[-len(data) // forwrev * i:]) - else: # pragma: no cover - raise NameError( - '{} is not a valid estimator.'.format(estimator)) + for data in df_list: + sample.append(data[-len(data) // num * i:]) sample = concat(sample) result = estimator_fit(sample) - backward_list.append(converter(result.delta_f_).iloc[0, -1]) + backward_list.append(result.delta_f_.iloc[0, -1]) if estimator.lower() == 'bar': error = np.sqrt(sum( - [converter(result.d_delta_f_).iloc[i, i + 1] ** 2 + [result.d_delta_f_.iloc[i, i + 1] ** 2 for i in range(len(result.d_delta_f_) - 1)])) backward_error_list.append(error) else: - backward_error_list.append(converter( - result.d_delta_f_).iloc[0, -1]) - self.logger.info('{:.2f} +/- {:.2f} kT'.format(backward_list[-1], - backward_error_list[ - -1])) + backward_error_list.append(result.d_delta_f_.iloc[0, -1]) + logger.info('{:.2f} +/- {:.2f} kT'.format(backward_list[-1], + backward_error_list[-1])) convergence = pd.DataFrame( - {'Forward ({})'.format(self.units): forward_list, - 'F. Error ({})'.format(self.units): forward_error_list, - 'Backward ({})'.format(self.units): backward_list, - 'B. Error ({})'.format(self.units): backward_error_list}) \ No newline at end of file + {'Forward': forward_list, + 'F. Error': forward_error_list, + 'Backward': backward_list, + 'B. Error': backward_error_list}) + convergence.attrs = df_list[0].attrs + return convergence \ No newline at end of file diff --git a/src/alchemlyb/tests/test_convergence.py b/src/alchemlyb/tests/test_convergence.py new file mode 100644 index 00000000..2f5df840 --- /dev/null +++ b/src/alchemlyb/tests/test_convergence.py @@ -0,0 +1,38 @@ +import pytest + +from alchemtest.gmx import load_benzene +from alchemlyb.parsing import gmx +from alchemlyb.postprocessors import forward_backward_convergence + +@pytest.fixture() +def gmx_benzene(): + dataset = load_benzene() + return [gmx.extract_dHdl(dhdl, T=300) for dhdl in dataset['data']['Coulomb']], \ + [gmx.extract_u_nk(dhdl, T=300) for dhdl in dataset['data']['Coulomb']] + +def test_convergence_ti(gmx_benzene): + dHdl, u_nk = gmx_benzene + convergence = forward_backward_convergence(dHdl, 'TI') + assert convergence.shape == (10, 4) + assert convergence.iloc[0, 0] == pytest.approx(3.07, 0.01) + assert convergence.iloc[0, 2] == pytest.approx(3.11, 0.01) + assert convergence.iloc[-1, 0] == pytest.approx(3.09, 0.01) + assert convergence.iloc[-1, 2] == pytest.approx(3.09, 0.01) + +def test_convergence_mbar(gmx_benzene): + dHdl, u_nk = gmx_benzene + convergence = forward_backward_convergence(u_nk, 'MBAR') + assert convergence.shape == (10, 4) + assert convergence.iloc[0, 0] == pytest.approx(3.02, 0.01) + assert convergence.iloc[0, 2] == pytest.approx(3.06, 0.01) + assert convergence.iloc[-1, 0] == pytest.approx(3.05, 0.01) + assert convergence.iloc[-1, 2] == pytest.approx(3.04, 0.01) + +def test_convergence_bar(gmx_benzene): + dHdl, u_nk = gmx_benzene + convergence = forward_backward_convergence(u_nk, 'BAR') + assert convergence.shape == (10, 4) + assert convergence.iloc[0, 0] == pytest.approx(3.02, 0.01) + assert convergence.iloc[0, 2] == pytest.approx(3.06, 0.01) + assert convergence.iloc[-1, 0] == pytest.approx(3.05, 0.01) + assert convergence.iloc[-1, 2] == pytest.approx(3.04, 0.01) From 8ad6bb2f198772812f41d0b274a564135e4f57c6 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 26 Sep 2021 12:40:55 +0100 Subject: [PATCH 039/123] update --- docs/postprocessing.rst | 28 +++++++++++++++++++ ...chemlyb.visualisation.plot_convergence.rst | 10 +++++-- src/alchemlyb/postprocessors/convergence.py | 2 +- src/alchemlyb/tests/test_visualisation.py | 9 ++++++ src/alchemlyb/visualisation/convergence.py | 21 ++++++++++++-- 5 files changed, 63 insertions(+), 7 deletions(-) diff --git a/docs/postprocessing.rst b/docs/postprocessing.rst index d7451108..87f28b61 100644 --- a/docs/postprocessing.rst +++ b/docs/postprocessing.rst @@ -5,6 +5,34 @@ Tools for postprocessing Tools are available for postprocessing the dataframes. +Time Convergence +---------------- +One way of determining the simulation end point is to compute and plot the +forward and backward convergence of the estimate using +:func:`~alchemlyb.postprocessors.forward_backward_convergence` and +:func:`~alchemlyb.visualisation.plot_convergence`. :: + + >>> import pandas as pd + >>> from alchemtest.gmx import load_benzene + >>> from alchemlyb.parsing.gmx import extract_u_nk + >>> from alchemlyb.estimators import MBAR + >>> from alchemlyb.visualisation import plot_convergence + >>> from alchemlyb.postprocessors import forward_backward_convergence + + >>> bz = load_benzene().data + >>> data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] + >>> df = forward_backward_convergence(data_list, 'mbar') + >>> ax = plot_convergence(dataframe=df) + >>> ax.figure.savefig('dF_t.pdf') + +Will give a plot looks like this + +.. figure:: images/dF_t.png + + A convergence plot of showing that the forward and backward has converged + fully. + +.. autofunction:: alchemlyb.postprocessors.forward_backward_convergence Unit Conversion --------------- diff --git a/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst b/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst index ea532ea2..c58a7d90 100644 --- a/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst +++ b/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst @@ -5,9 +5,13 @@ Plot the Forward and Backward Convergence The function :func:`~alchemlyb.visualisation.plot_convergence` allows the user to visualise the convergence by plotting the free energy change -computed using the equilibrated snapshots between the proper target time frames -in both forward (data points are stored in `forward` and `forward_error`) and -reverse (data points are stored in `backward` and `backward_error`) directions. +computed using the equilibrated snapshots between the proper target time +frames. The data could be provided as a Dataframe as the output from +:func:`alchemlyb.postprocessors.forward_backward_convergence` or provided +explicitly in both forward (data points are stored in `forward` and +`forward_error`) and reverse (data points are stored in `backward` and +`backward_error`) directions. + The unit in the y axis could be labelled to other units by setting *units*, which by default is :math:`kT`. The user can pass :class:`matplotlib.axes.Axes` into the function to have the convergence drawn on a specific axes. diff --git a/src/alchemlyb/postprocessors/convergence.py b/src/alchemlyb/postprocessors/convergence.py index 7ecc07ea..d0c9523d 100644 --- a/src/alchemlyb/postprocessors/convergence.py +++ b/src/alchemlyb/postprocessors/convergence.py @@ -103,4 +103,4 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): 'Backward': backward_list, 'B. Error': backward_error_list}) convergence.attrs = df_list[0].attrs - return convergence \ No newline at end of file + return convergence diff --git a/src/alchemlyb/tests/test_visualisation.py b/src/alchemlyb/tests/test_visualisation.py index f45dac77..32d8db12 100644 --- a/src/alchemlyb/tests/test_visualisation.py +++ b/src/alchemlyb/tests/test_visualisation.py @@ -12,6 +12,7 @@ from alchemlyb.visualisation.ti_dhdl import plot_ti_dhdl from alchemlyb.visualisation.dF_state import plot_dF_state from alchemlyb.visualisation import plot_convergence +from alchemlyb.postprocessors import forward_backward_convergence def test_plot_mbar_omatrix(): '''Just test if the plot runs''' @@ -126,6 +127,14 @@ def test_plot_dF_state(): assert isinstance(fig, matplotlib.figure.Figure) plt.close(fig) +def test_plot_convergence_dataframe(): + bz = load_benzene().data + data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] + df = forward_backward_convergence(data_list, 'mbar') + ax = plot_convergence(dataframe=df) + assert isinstance(ax, matplotlib.axes.Axes) + plt.close(ax.figure) + def test_plot_convergence(): bz = load_benzene().data data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index e93bfe8a..823787b9 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -2,7 +2,10 @@ from matplotlib.font_manager import FontProperties as FP import numpy as np -def plot_convergence(forward, forward_error, backward, backward_error, +from ..postprocessors.units import get_unit_converter + +def plot_convergence(forward=None, forward_error=None, backward=None, + backward_error=None, dataframe=None, units='kT', ax=None): """Plot the forward and backward convergence. @@ -16,6 +19,11 @@ def plot_convergence(forward, forward_error, backward, backward_error, A list of free energy estimate from the last X% of data. backward_error : List A list of error from the last X% of data. + dataframe : Dataframe + Output Dataframe from + :func:`~alchemlyb.postprocessors.forward_backward_convergence`. If + Dataframe is provided, `forward`, `forward_error`, `backward`, + `backward_error` will be ignored. units : str The label for the unit of the estimate. Default: "kT" ax : matplotlib.axes.Axes @@ -32,12 +40,19 @@ def plot_convergence(forward, forward_error, backward, backward_error, The code is taken and modified from `Alchemical Analysis `_. - The units variable is for labelling only. Changing it doesn't change the - unit of the underlying variable. + If `dataframe` is not provide, the units variable is for labelling only. + Changing it doesn't change the unit of the underlying variable. .. versionadded:: 0.4.0 """ + if dataframe is not None: + dataframe = get_unit_converter(units)(dataframe) + forward = dataframe['Forward'].to_numpy() + forward_error = dataframe['F. Error'].to_numpy() + backward = dataframe['Backward'].to_numpy() + backward_error = dataframe['B. Error'].to_numpy() + if ax is None: # pragma: no cover fig, ax = plt.subplots(figsize=(8, 6)) From e87867fa1facc181aaed5833b953f106a576c95a Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 26 Sep 2021 13:43:20 +0100 Subject: [PATCH 040/123] update doc --- docs/postprocessing.rst | 2 -- src/alchemlyb/postprocessors/convergence.py | 5 ++++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/postprocessing.rst b/docs/postprocessing.rst index 87f28b61..55915280 100644 --- a/docs/postprocessing.rst +++ b/docs/postprocessing.rst @@ -12,10 +12,8 @@ forward and backward convergence of the estimate using :func:`~alchemlyb.postprocessors.forward_backward_convergence` and :func:`~alchemlyb.visualisation.plot_convergence`. :: - >>> import pandas as pd >>> from alchemtest.gmx import load_benzene >>> from alchemlyb.parsing.gmx import extract_u_nk - >>> from alchemlyb.estimators import MBAR >>> from alchemlyb.visualisation import plot_convergence >>> from alchemlyb.postprocessors import forward_backward_convergence diff --git a/src/alchemlyb/postprocessors/convergence.py b/src/alchemlyb/postprocessors/convergence.py index d0c9523d..01c891b8 100644 --- a/src/alchemlyb/postprocessors/convergence.py +++ b/src/alchemlyb/postprocessors/convergence.py @@ -24,7 +24,8 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): ------- DataFrame The DataFrame with convergence data. :: - Forward F. Error Backward B. Error + + Forward F. Error Backward B. Error 0 33.988935 0.334676 35.666128 0.324426 1 35.075489 0.232150 35.382850 0.230944 2 34.919988 0.190424 35.156028 0.189489 @@ -36,6 +37,8 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): 8 35.035123 0.110147 35.225907 0.109742 9 35.113417 0.104280 35.113417 0.104280 + + ''' logger = logging.getLogger('alchemlyb.postprocessors.' 'forward_backward_convergence') From 7d2b936c0e221c45130d6b5e6b511f19a9c2e0e8 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 26 Sep 2021 13:51:43 +0100 Subject: [PATCH 041/123] update doc --- docs/visualisation/alchemlyb.visualisation.plot_convergence.rst | 2 +- src/alchemlyb/postprocessors/convergence.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst b/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst index c58a7d90..74a85e89 100644 --- a/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst +++ b/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst @@ -6,7 +6,7 @@ Plot the Forward and Backward Convergence The function :func:`~alchemlyb.visualisation.plot_convergence` allows the user to visualise the convergence by plotting the free energy change computed using the equilibrated snapshots between the proper target time -frames. The data could be provided as a Dataframe as the output from +frames. The data could be provided as a Dataframe from :func:`alchemlyb.postprocessors.forward_backward_convergence` or provided explicitly in both forward (data points are stored in `forward` and `forward_error`) and reverse (data points are stored in `backward` and diff --git a/src/alchemlyb/postprocessors/convergence.py b/src/alchemlyb/postprocessors/convergence.py index 01c891b8..efc3b32f 100644 --- a/src/alchemlyb/postprocessors/convergence.py +++ b/src/alchemlyb/postprocessors/convergence.py @@ -37,8 +37,6 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): 8 35.035123 0.110147 35.225907 0.109742 9 35.113417 0.104280 35.113417 0.104280 - - ''' logger = logging.getLogger('alchemlyb.postprocessors.' 'forward_backward_convergence') From 36f9adc73aea833e6432ef689d2be7ed83387628 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 4 Oct 2021 21:25:07 +0100 Subject: [PATCH 042/123] update --- docs/api_principles.rst | 6 +- docs/convergence.rst | 34 ++++++ docs/index.rst | 1 + docs/postprocessing.rst | 26 ----- src/alchemlyb/convergence/__init__.py | 1 + src/alchemlyb/convergence/convergence.py | 113 ++++++++++++++++++++ src/alchemlyb/postprocessors/__init__.py | 3 - src/alchemlyb/postprocessors/convergence.py | 107 ------------------ src/alchemlyb/tests/test_convergence.py | 2 +- src/alchemlyb/tests/test_visualisation.py | 4 +- src/alchemlyb/visualisation/convergence.py | 34 +++--- 11 files changed, 172 insertions(+), 159 deletions(-) create mode 100644 docs/convergence.rst delete mode 100644 src/alchemlyb/postprocessors/convergence.py diff --git a/docs/api_principles.rst b/docs/api_principles.rst index d629b50c..492ed944 100644 --- a/docs/api_principles.rst +++ b/docs/api_principles.rst @@ -57,7 +57,8 @@ The library is structured as follows, following a similar style to │   └── ... ├── postprocessors │   ├── ... - │   └── units.py + │   └── convergence.py + │   └── units.py ├── visualisation │   ├── convergence.py │   ├── dF_state.py @@ -83,8 +84,7 @@ The :mod:`~alchemlyb.estimators` module features classes *a la* **scikit-learn** MBAR, BAR, and thermodynamic integration (TI) as the major methods are all implemented. Correct error estimates require the use of time series with independent samples. -The :mod:`~alchemlyb.convergence` submodule will feature convenience functions/classes for doing convergence analysis using a given dataset and a chosen estimator, though the form of this is not yet thought-out. -However, the `gist a41e5756a58e1775e3e3a915f07bfd37`_ shows an example for how this can be done already in practice. +The :mod:`~alchemlyb.convergence` submodule will feature convenience functions/classes for doing convergence analysis using a given dataset and a chosen estimator. The :mod:`~alchemlyb.postprocessing` submodule contains functions to calculate new quantities or express data in different units. diff --git a/docs/convergence.rst b/docs/convergence.rst new file mode 100644 index 00000000..be20a657 --- /dev/null +++ b/docs/convergence.rst @@ -0,0 +1,34 @@ +Using functions to estimate Convergence +======================================= + +For a result to be valid, we need to ensure that longer simulation time +would not result in different results. Various functions are provided in +this module to estimate the convergence of the data and help user determine +the simulation end point. + +Time Convergence +---------------- +One way of determining the simulation end point is to compute and plot the +forward and backward convergence of the estimate using +:func:`~alchemlyb.convergence.forward_backward_convergence` and +:func:`~alchemlyb.visualisation.plot_convergence`. :: + + >>> from alchemtest.gmx import load_benzene + >>> from alchemlyb.parsing.gmx import extract_u_nk + >>> from alchemlyb.visualisation import plot_convergence + >>> from alchemlyb.convergence import forward_backward_convergence + + >>> bz = load_benzene().data + >>> data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] + >>> df = forward_backward_convergence(data_list, 'mbar') + >>> ax = plot_convergence(dataframe=df) + >>> ax.figure.savefig('dF_t.pdf') + +Will give a plot looks like this + +.. figure:: images/dF_t.png + + A convergence plot of showing that the forward and backward has converged + fully. + +.. autofunction:: alchemlyb.convergence.forward_backward_convergence \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index dfcabf6a..f9b36240 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -70,6 +70,7 @@ Contributions are very welcome. If you have bug reports or feature requests or q parsing preprocessing estimators + convergence postprocessing visualisation diff --git a/docs/postprocessing.rst b/docs/postprocessing.rst index 55915280..d7451108 100644 --- a/docs/postprocessing.rst +++ b/docs/postprocessing.rst @@ -5,32 +5,6 @@ Tools for postprocessing Tools are available for postprocessing the dataframes. -Time Convergence ----------------- -One way of determining the simulation end point is to compute and plot the -forward and backward convergence of the estimate using -:func:`~alchemlyb.postprocessors.forward_backward_convergence` and -:func:`~alchemlyb.visualisation.plot_convergence`. :: - - >>> from alchemtest.gmx import load_benzene - >>> from alchemlyb.parsing.gmx import extract_u_nk - >>> from alchemlyb.visualisation import plot_convergence - >>> from alchemlyb.postprocessors import forward_backward_convergence - - >>> bz = load_benzene().data - >>> data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] - >>> df = forward_backward_convergence(data_list, 'mbar') - >>> ax = plot_convergence(dataframe=df) - >>> ax.figure.savefig('dF_t.pdf') - -Will give a plot looks like this - -.. figure:: images/dF_t.png - - A convergence plot of showing that the forward and backward has converged - fully. - -.. autofunction:: alchemlyb.postprocessors.forward_backward_convergence Unit Conversion --------------- diff --git a/src/alchemlyb/convergence/__init__.py b/src/alchemlyb/convergence/__init__.py index e69de29b..6469cee2 100644 --- a/src/alchemlyb/convergence/__init__.py +++ b/src/alchemlyb/convergence/__init__.py @@ -0,0 +1 @@ +from convergence import forward_backward_convergence diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index e69de29b..8a4124c4 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -0,0 +1,113 @@ +import pandas as pd +import logging +import numpy as np + +from ..estimators import MBAR, BAR, TI +from .. import concat + + +def forward_backward_convergence(df_list, estimator='mbar', num=10): + ''' The forward and backward convergence of the free energy estimate. + + Generate the free energy change as a function of time in both + directions, with the specified number of points in the time. + + Parameters + ---------- + df_list : list + List of DataFrame of either dHdl or u_nk. + estimator : {'mbar', 'bar', 'ti'} + Name of the estimators. + num : int + The number of time points. + + Returns + ------- + DataFrame + The DataFrame with convergence data. :: + + Forward Forward_Error Backward Backward_Error + t_fraction + 1/10 3.067943 0.070175 3.111035 0.067088 + 2/10 3.122223 0.049303 3.126450 0.048173 + 3/10 3.117742 0.039916 3.094115 0.039099 + 4/10 3.091870 0.034389 3.101558 0.033783 + 5/10 3.093778 0.030814 3.082714 0.030148 + 6/10 3.079128 0.027999 3.085972 0.027652 + 7/10 3.086951 0.025847 3.077004 0.025610 + 8/10 3.079147 0.024122 3.081519 0.023968 + 9/10 3.086575 0.022778 3.090475 0.022633 + 10/10 3.088821 0.021573 3.089027 0.021568 + + + .. versionadded:: 0.6.0 + ''' + logger = logging.getLogger('alchemlyb.postprocessors.' + 'forward_backward_convergence') + logger.info('Start convergence analysis.') + logger.info('Check data availability.') + + if estimator.lower() == 'mbar': + logger.info('Use MBAR estimator for convergence analysis.') + estimator_fit = MBAR().fit + elif estimator.lower() == 'bar': + logger.info('Use BAR estimator for convergence analysis.') + estimator_fit = BAR().fit + elif estimator.lower() == 'ti': + logger.info('Use TI estimator for convergence analysis.') + estimator_fit = TI().fit + else: # pragma: no cover + logger.warning( + '{} is not a valid estimator.'.format(estimator)) + + logger.info('Begin forward analysis') + forward_list = [] + forward_error_list = [] + for i in range(1, num + 1): + logger.info('Forward analysis: {:.2f}%'.format(i / num)) + sample = [] + for data in df_list: + sample.append(data[:len(data) // num * i]) + sample = concat(sample) + result = estimator_fit(sample) + forward_list.append(result.delta_f_.iloc[0, -1]) + if estimator.lower() == 'bar': + error = np.sqrt(sum( + [result.d_delta_f_.iloc[i, i + 1] ** 2 + for i in range(len(result.d_delta_f_) - 1)])) + forward_error_list.append(error) + else: + forward_error_list.append(result.d_delta_f_.iloc[0, -1]) + logger.info('{:.2f} +/- {:.2f} kT'.format(forward_list[-1], + forward_error_list[-1])) + + logger.info('Begin backward analysis') + backward_list = [] + backward_error_list = [] + for i in range(1, num + 1): + logger.info('Backward analysis: {:.2f}%'.format(i / num)) + sample = [] + for data in df_list: + sample.append(data[-len(data) // num * i:]) + sample = concat(sample) + result = estimator_fit(sample) + backward_list.append(result.delta_f_.iloc[0, -1]) + if estimator.lower() == 'bar': + error = np.sqrt(sum( + [result.d_delta_f_.iloc[i, i + 1] ** 2 + for i in range(len(result.d_delta_f_) - 1)])) + backward_error_list.append(error) + else: + backward_error_list.append(result.d_delta_f_.iloc[0, -1]) + logger.info('{:.2f} +/- {:.2f} kT'.format(backward_list[-1], + backward_error_list[-1])) + + convergence = pd.DataFrame( + {'Forward': forward_list, + 'Forward_Error': forward_error_list, + 'Backward': backward_list, + 'Backward_Error': backward_error_list}, + index=['{}/{}'.format(i, num) for i in range(1, num + 1)]) + convergence.index.name = 't_fraction' + convergence.attrs = df_list[0].attrs + return convergence diff --git a/src/alchemlyb/postprocessors/__init__.py b/src/alchemlyb/postprocessors/__init__.py index 563c2196..6e769ac4 100644 --- a/src/alchemlyb/postprocessors/__init__.py +++ b/src/alchemlyb/postprocessors/__init__.py @@ -1,6 +1,3 @@ -from .convergence import forward_backward_convergence - __all__ = [ 'units', - 'forward_backward_convergence' ] diff --git a/src/alchemlyb/postprocessors/convergence.py b/src/alchemlyb/postprocessors/convergence.py deleted file mode 100644 index efc3b32f..00000000 --- a/src/alchemlyb/postprocessors/convergence.py +++ /dev/null @@ -1,107 +0,0 @@ -import pandas as pd -import logging -import numpy as np - -from ..estimators import MBAR, BAR, TI -from .. import concat - -def forward_backward_convergence(df_list, estimator='mbar', num=10): - ''' The forward and backward convergence of the free energy estimate. - - Generate the free energy change as a function of time in both - directions, with the specified number of points in the time. - - Parameters - ---------- - df_list : list - List of DataFrame of either dHdl or u_nk. - estimator : {'mbar', 'bar', 'ti'} - Name of the estimators. - num : int - The number of time points. - - Returns - ------- - DataFrame - The DataFrame with convergence data. :: - - Forward F. Error Backward B. Error - 0 33.988935 0.334676 35.666128 0.324426 - 1 35.075489 0.232150 35.382850 0.230944 - 2 34.919988 0.190424 35.156028 0.189489 - 3 34.929927 0.165316 35.242255 0.164400 - 4 34.957007 0.147852 35.247704 0.147191 - 5 35.003660 0.134952 35.214658 0.134458 - 6 35.070199 0.124956 35.178422 0.124664 - 7 35.019853 0.116970 35.096870 0.116783 - 8 35.035123 0.110147 35.225907 0.109742 - 9 35.113417 0.104280 35.113417 0.104280 - - ''' - logger = logging.getLogger('alchemlyb.postprocessors.' - 'forward_backward_convergence') - logger.info('Start convergence analysis.') - logger.info('Check data availability.') - - if estimator.lower() == 'mbar': - logger.info('Use MBAR estimator for convergence analysis.') - estimator_fit = MBAR().fit - elif estimator.lower() == 'bar': - logger.info('Use BAR estimator for convergence analysis.') - estimator_fit = BAR().fit - elif estimator.lower() == 'ti': - logger.info('Use TI estimator for convergence analysis.') - estimator_fit = TI().fit - else: # pragma: no cover - logger.warning( - '{} is not a valid estimator.'.format(estimator)) - - logger.info('Begin forward analysis') - forward_list = [] - forward_error_list = [] - for i in range(1, num + 1): - logger.info('Forward analysis: {:.2f}%'.format(i / num)) - sample = [] - for data in df_list: - sample.append(data[:len(data) // num * i]) - sample = concat(sample) - result = estimator_fit(sample) - forward_list.append(result.delta_f_.iloc[0, -1]) - if estimator.lower() == 'bar': - error = np.sqrt(sum( - [result.d_delta_f_.iloc[i, i + 1] ** 2 - for i in range(len(result.d_delta_f_) - 1)])) - forward_error_list.append(error) - else: - forward_error_list.append(result.d_delta_f_.iloc[0, -1]) - logger.info('{:.2f} +/- {:.2f} kT'.format(forward_list[-1], - forward_error_list[-1])) - - logger.info('Begin backward analysis') - backward_list = [] - backward_error_list = [] - for i in range(1, num + 1): - logger.info('Backward analysis: {:.2f}%'.format(i / num)) - sample = [] - for data in df_list: - sample.append(data[-len(data) // num * i:]) - sample = concat(sample) - result = estimator_fit(sample) - backward_list.append(result.delta_f_.iloc[0, -1]) - if estimator.lower() == 'bar': - error = np.sqrt(sum( - [result.d_delta_f_.iloc[i, i + 1] ** 2 - for i in range(len(result.d_delta_f_) - 1)])) - backward_error_list.append(error) - else: - backward_error_list.append(result.d_delta_f_.iloc[0, -1]) - logger.info('{:.2f} +/- {:.2f} kT'.format(backward_list[-1], - backward_error_list[-1])) - - convergence = pd.DataFrame( - {'Forward': forward_list, - 'F. Error': forward_error_list, - 'Backward': backward_list, - 'B. Error': backward_error_list}) - convergence.attrs = df_list[0].attrs - return convergence diff --git a/src/alchemlyb/tests/test_convergence.py b/src/alchemlyb/tests/test_convergence.py index 2f5df840..5fae5841 100644 --- a/src/alchemlyb/tests/test_convergence.py +++ b/src/alchemlyb/tests/test_convergence.py @@ -2,7 +2,7 @@ from alchemtest.gmx import load_benzene from alchemlyb.parsing import gmx -from alchemlyb.postprocessors import forward_backward_convergence +from alchemlyb.convergence import forward_backward_convergence @pytest.fixture() def gmx_benzene(): diff --git a/src/alchemlyb/tests/test_visualisation.py b/src/alchemlyb/tests/test_visualisation.py index 32d8db12..bcaf4a32 100644 --- a/src/alchemlyb/tests/test_visualisation.py +++ b/src/alchemlyb/tests/test_visualisation.py @@ -12,7 +12,7 @@ from alchemlyb.visualisation.ti_dhdl import plot_ti_dhdl from alchemlyb.visualisation.dF_state import plot_dF_state from alchemlyb.visualisation import plot_convergence -from alchemlyb.postprocessors import forward_backward_convergence +from alchemlyb.convergence import forward_backward_convergence def test_plot_mbar_omatrix(): '''Just test if the plot runs''' @@ -131,7 +131,7 @@ def test_plot_convergence_dataframe(): bz = load_benzene().data data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] df = forward_backward_convergence(data_list, 'mbar') - ax = plot_convergence(dataframe=df) + ax = plot_convergence(df) assert isinstance(ax, matplotlib.axes.Axes) plt.close(ax.figure) diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index 823787b9..d3cccf43 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -1,29 +1,20 @@ import matplotlib.pyplot as plt +import pandas as pd from matplotlib.font_manager import FontProperties as FP import numpy as np from ..postprocessors.units import get_unit_converter -def plot_convergence(forward=None, forward_error=None, backward=None, - backward_error=None, dataframe=None, - units='kT', ax=None): +def plot_convergence(*data, units='kT', ax=None): """Plot the forward and backward convergence. Parameters ---------- - forward : List - A list of free energy estimate from the first X% of data. - forward_error : List - A list of error from the first X% of data. - backward : List - A list of free energy estimate from the last X% of data. - backward_error : List - A list of error from the last X% of data. - dataframe : Dataframe + data : Dataframe or 4 Lists Output Dataframe from - :func:`~alchemlyb.postprocessors.forward_backward_convergence`. If - Dataframe is provided, `forward`, `forward_error`, `backward`, - `backward_error` will be ignored. + :func:`~alchemlyb.postprocessors.convergence.forward_backward_convergence`. + Or given explicitly as `forward`, `forward_error`, `backward`, + `backward_error` see :ref:`plot_convergence `. units : str The label for the unit of the estimate. Default: "kT" ax : matplotlib.axes.Axes @@ -44,14 +35,23 @@ def plot_convergence(forward=None, forward_error=None, backward=None, Changing it doesn't change the unit of the underlying variable. + .. versionchanged:: 0.6.0 + data now takes in dataframe + .. versionadded:: 0.4.0 """ - if dataframe is not None: - dataframe = get_unit_converter(units)(dataframe) + if len(data) == 1 and isinstance(data[0], pd.DataFrame): + dataframe = get_unit_converter(units)(data) forward = dataframe['Forward'].to_numpy() forward_error = dataframe['F. Error'].to_numpy() backward = dataframe['Backward'].to_numpy() backward_error = dataframe['B. Error'].to_numpy() + else: + try: + forward, forward_error, backward, backward_error = data + except ValueError: + raise ValueError('Ensure all four of forward, forward_error, ' + 'backward, backward_error are supplied.') if ax is None: # pragma: no cover fig, ax = plt.subplots(figsize=(8, 6)) From f9d08bc8bd865adf0ea3fb4de53da7a595e547b3 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 4 Oct 2021 21:32:15 +0100 Subject: [PATCH 043/123] update --- docs/api_principles.rst | 1 - docs/convergence.rst | 6 +++--- .../alchemlyb.visualisation.plot_convergence.rst | 2 +- src/alchemlyb/convergence/__init__.py | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/api_principles.rst b/docs/api_principles.rst index 492ed944..8d912d08 100644 --- a/docs/api_principles.rst +++ b/docs/api_principles.rst @@ -57,7 +57,6 @@ The library is structured as follows, following a similar style to │   └── ... ├── postprocessors │   ├── ... - │   └── convergence.py │   └── units.py ├── visualisation │   ├── convergence.py diff --git a/docs/convergence.rst b/docs/convergence.rst index be20a657..14a1b480 100644 --- a/docs/convergence.rst +++ b/docs/convergence.rst @@ -2,8 +2,8 @@ Using functions to estimate Convergence ======================================= For a result to be valid, we need to ensure that longer simulation time -would not result in different results. Various functions are provided in -this module to estimate the convergence of the data and help user determine +would not result in different results. Various functions will be provided in +this module to estimate the convergence of the estimate and help user determine the simulation end point. Time Convergence @@ -21,7 +21,7 @@ forward and backward convergence of the estimate using >>> bz = load_benzene().data >>> data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] >>> df = forward_backward_convergence(data_list, 'mbar') - >>> ax = plot_convergence(dataframe=df) + >>> ax = plot_convergence(df) >>> ax.figure.savefig('dF_t.pdf') Will give a plot looks like this diff --git a/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst b/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst index 74a85e89..3ca9abbf 100644 --- a/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst +++ b/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst @@ -7,7 +7,7 @@ The function :func:`~alchemlyb.visualisation.plot_convergence` allows the user to visualise the convergence by plotting the free energy change computed using the equilibrated snapshots between the proper target time frames. The data could be provided as a Dataframe from -:func:`alchemlyb.postprocessors.forward_backward_convergence` or provided +:func:`alchemlyb.convergence.forward_backward_convergence` or provided explicitly in both forward (data points are stored in `forward` and `forward_error`) and reverse (data points are stored in `backward` and `backward_error`) directions. diff --git a/src/alchemlyb/convergence/__init__.py b/src/alchemlyb/convergence/__init__.py index 6469cee2..e8dd32b3 100644 --- a/src/alchemlyb/convergence/__init__.py +++ b/src/alchemlyb/convergence/__init__.py @@ -1 +1 @@ -from convergence import forward_backward_convergence +from .convergence import forward_backward_convergence From 305ceac8a84cd4285fc23a18a9fa46354ed7fe60 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Thu, 7 Oct 2021 20:09:20 +0100 Subject: [PATCH 044/123] update --- CHANGES | 4 +++ src/alchemlyb/convergence/convergence.py | 36 ++++++++++------------ src/alchemlyb/tests/test_convergence.py | 8 ++++- src/alchemlyb/visualisation/convergence.py | 13 +++++--- 4 files changed, 36 insertions(+), 25 deletions(-) diff --git a/CHANGES b/CHANGES index 91e5ae66..d7ccf870 100644 --- a/CHANGES +++ b/CHANGES @@ -20,6 +20,10 @@ The rules for this file: Changes Enhancements + - Time convergence function forward_backward_convergence + introduced and plot_convergence now takes dataframe from + forward_backward_convergence as input (PR #168). + Fixes - Subsampling now works with bounds and step (PR #167, issue #166). diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index 8a4124c4..3e3ec682 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -26,18 +26,17 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): DataFrame The DataFrame with convergence data. :: - Forward Forward_Error Backward Backward_Error - t_fraction - 1/10 3.067943 0.070175 3.111035 0.067088 - 2/10 3.122223 0.049303 3.126450 0.048173 - 3/10 3.117742 0.039916 3.094115 0.039099 - 4/10 3.091870 0.034389 3.101558 0.033783 - 5/10 3.093778 0.030814 3.082714 0.030148 - 6/10 3.079128 0.027999 3.085972 0.027652 - 7/10 3.086951 0.025847 3.077004 0.025610 - 8/10 3.079147 0.024122 3.081519 0.023968 - 9/10 3.086575 0.022778 3.090475 0.022633 - 10/10 3.088821 0.021573 3.089027 0.021568 + Forward Forward_Error Backward Backward_Error data_fraction + 0 3.016442 0.052748 3.065176 0.051036 0.1 + 1 3.078106 0.037170 3.078567 0.036640 0.2 + 2 3.072561 0.030186 3.047357 0.029775 0.3 + 3 3.048325 0.026070 3.057527 0.025743 0.4 + 4 3.049769 0.023359 3.037454 0.023001 0.5 + 5 3.034078 0.021260 3.040484 0.021075 0.6 + 6 3.043274 0.019642 3.032495 0.019517 0.7 + 7 3.035460 0.018340 3.036670 0.018261 0.8 + 8 3.042032 0.017319 3.046597 0.017233 0.9 + 9 3.044149 0.016405 3.044385 0.016402 1.0 .. versionadded:: 0.6.0 @@ -56,15 +55,15 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): elif estimator.lower() == 'ti': logger.info('Use TI estimator for convergence analysis.') estimator_fit = TI().fit - else: # pragma: no cover - logger.warning( + else: + raise ValueError( '{} is not a valid estimator.'.format(estimator)) logger.info('Begin forward analysis') forward_list = [] forward_error_list = [] for i in range(1, num + 1): - logger.info('Forward analysis: {:.2f}%'.format(i / num)) + logger.info('Forward analysis: {:.2f}%'.format(100 * i / num)) sample = [] for data in df_list: sample.append(data[:len(data) // num * i]) @@ -85,7 +84,7 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): backward_list = [] backward_error_list = [] for i in range(1, num + 1): - logger.info('Backward analysis: {:.2f}%'.format(i / num)) + logger.info('Backward analysis: {:.2f}%'.format(100 * i / num)) sample = [] for data in df_list: sample.append(data[-len(data) // num * i:]) @@ -106,8 +105,7 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): {'Forward': forward_list, 'Forward_Error': forward_error_list, 'Backward': backward_list, - 'Backward_Error': backward_error_list}, - index=['{}/{}'.format(i, num) for i in range(1, num + 1)]) - convergence.index.name = 't_fraction' + 'Backward_Error': backward_error_list, + 'data_fraction': [i / num for i in range(1, num + 1)]}) convergence.attrs = df_list[0].attrs return convergence diff --git a/src/alchemlyb/tests/test_convergence.py b/src/alchemlyb/tests/test_convergence.py index 5fae5841..ab533a92 100644 --- a/src/alchemlyb/tests/test_convergence.py +++ b/src/alchemlyb/tests/test_convergence.py @@ -31,8 +31,14 @@ def test_convergence_mbar(gmx_benzene): def test_convergence_bar(gmx_benzene): dHdl, u_nk = gmx_benzene convergence = forward_backward_convergence(u_nk, 'BAR') - assert convergence.shape == (10, 4) + assert convergence.shape == (10, 5) assert convergence.iloc[0, 0] == pytest.approx(3.02, 0.01) assert convergence.iloc[0, 2] == pytest.approx(3.06, 0.01) assert convergence.iloc[-1, 0] == pytest.approx(3.05, 0.01) assert convergence.iloc[-1, 2] == pytest.approx(3.04, 0.01) + +def test_convergence_wrong_estimator(gmx_benzene): + dHdl, u_nk = gmx_benzene + with pytest.raises(ValueError): + convergence = forward_backward_convergence(u_nk, 'www') + \ No newline at end of file diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index d3cccf43..a30585b5 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -10,9 +10,9 @@ def plot_convergence(*data, units='kT', ax=None): Parameters ---------- - data : Dataframe or 4 Lists + data : Dataframe or 4 array_like objects Output Dataframe from - :func:`~alchemlyb.postprocessors.convergence.forward_backward_convergence`. + :func:`~alchemlyb.convergence.forward_backward_convergence`. Or given explicitly as `forward`, `forward_error`, `backward`, `backward_error` see :ref:`plot_convergence `. units : str @@ -31,7 +31,10 @@ def plot_convergence(*data, units='kT', ax=None): The code is taken and modified from `Alchemical Analysis `_. - If `dataframe` is not provide, the units variable is for labelling only. + If `data` is not an :class:pandas.Dataframe` produced by + :func:`~alchemlyb.convergence.forward_backward_convergence`, + the unit will be adjusted accoridng to the units + variable. Otherwise, the units variable is for labelling only. Changing it doesn't change the unit of the underlying variable. @@ -43,9 +46,9 @@ def plot_convergence(*data, units='kT', ax=None): if len(data) == 1 and isinstance(data[0], pd.DataFrame): dataframe = get_unit_converter(units)(data) forward = dataframe['Forward'].to_numpy() - forward_error = dataframe['F. Error'].to_numpy() + forward_error = dataframe['Forward_Error'].to_numpy() backward = dataframe['Backward'].to_numpy() - backward_error = dataframe['B. Error'].to_numpy() + backward_error = dataframe['Backward_Error'].to_numpy() else: try: forward, forward_error, backward, backward_error = data From d28158253a83a942e31bbe0b55a0608fcd098bbd Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Thu, 7 Oct 2021 20:16:06 +0100 Subject: [PATCH 045/123] Update test_convergence.py --- src/alchemlyb/tests/test_convergence.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/alchemlyb/tests/test_convergence.py b/src/alchemlyb/tests/test_convergence.py index 5f70c24a..f1b1a43c 100644 --- a/src/alchemlyb/tests/test_convergence.py +++ b/src/alchemlyb/tests/test_convergence.py @@ -13,7 +13,7 @@ def gmx_benzene(): def test_convergence_ti(gmx_benzene): dHdl, u_nk = gmx_benzene convergence = forward_backward_convergence(dHdl, 'TI') - assert convergence.shape == (10, 4) + assert convergence.shape == (10, 5) assert convergence.iloc[0, 0] == pytest.approx(3.07, 0.01) assert convergence.iloc[0, 2] == pytest.approx(3.11, 0.01) assert convergence.iloc[-1, 0] == pytest.approx(3.09, 0.01) @@ -22,7 +22,7 @@ def test_convergence_ti(gmx_benzene): def test_convergence_mbar(gmx_benzene): dHdl, u_nk = gmx_benzene convergence = forward_backward_convergence(u_nk, 'MBAR') - assert convergence.shape == (10, 4) + assert convergence.shape == (10, 5) assert convergence.iloc[0, 0] == pytest.approx(3.02, 0.01) assert convergence.iloc[0, 2] == pytest.approx(3.06, 0.01) assert convergence.iloc[-1, 0] == pytest.approx(3.05, 0.01) From f6ce6660866bb9bcaddf1497398f58cc0316b475 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Thu, 7 Oct 2021 20:24:38 +0100 Subject: [PATCH 046/123] update --- src/alchemlyb/convergence/convergence.py | 6 +++++- src/alchemlyb/visualisation/convergence.py | 11 +++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index 3e3ec682..db693c76 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -10,7 +10,11 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): ''' The forward and backward convergence of the free energy estimate. Generate the free energy change as a function of time in both - directions, with the specified number of points in the time. + directions, with the specified number of points in the time. For example, + setting `num` to 10 would give the forward convergence which is the free + energy estimate from the first 10%, 20%, 30% .. of the data. The + Backward would give the estimate from the last 10%, 20%, 30% .. of the + data. Parameters ---------- diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index a30585b5..1af944ac 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -8,6 +8,13 @@ def plot_convergence(*data, units='kT', ax=None): """Plot the forward and backward convergence. + The input could be the result from + :func:`~alchemlyb.convergence.forward_backward_convergence` or it could + be given explicitly as `forward`, `forward_error`, `backward`, + `backward_error`. These four array_like objects should have the same + shape and can be used as input for the + :func:`matplotlib.pyplot.errorbar`. + Parameters ---------- data : Dataframe or 4 array_like objects @@ -33,7 +40,7 @@ def plot_convergence(*data, units='kT', ax=None): If `data` is not an :class:pandas.Dataframe` produced by :func:`~alchemlyb.convergence.forward_backward_convergence`, - the unit will be adjusted accoridng to the units + the unit will be adjusted according to the units variable. Otherwise, the units variable is for labelling only. Changing it doesn't change the unit of the underlying variable. @@ -44,7 +51,7 @@ def plot_convergence(*data, units='kT', ax=None): .. versionadded:: 0.4.0 """ if len(data) == 1 and isinstance(data[0], pd.DataFrame): - dataframe = get_unit_converter(units)(data) + dataframe = get_unit_converter(units)(data[0]) forward = dataframe['Forward'].to_numpy() forward_error = dataframe['Forward_Error'].to_numpy() backward = dataframe['Backward'].to_numpy() From e9601fb95021425ac5f674019476193228fccf7d Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Thu, 7 Oct 2021 20:35:34 +0100 Subject: [PATCH 047/123] Update convergence.py --- src/alchemlyb/visualisation/convergence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index 1af944ac..a39ae76d 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -59,7 +59,7 @@ def plot_convergence(*data, units='kT', ax=None): else: try: forward, forward_error, backward, backward_error = data - except ValueError: + except ValueError: # pragma: no cover raise ValueError('Ensure all four of forward, forward_error, ' 'backward, backward_error are supplied.') From a4d379abd08c4bcef505a945725aa873ebf09e7b Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Thu, 7 Oct 2021 20:38:48 +0100 Subject: [PATCH 048/123] Update convergence.py --- src/alchemlyb/convergence/convergence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index db693c76..a4c48630 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -9,7 +9,7 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): ''' The forward and backward convergence of the free energy estimate. - Generate the free energy change as a function of time in both + Generate the free energy estimate as a function of time in both directions, with the specified number of points in the time. For example, setting `num` to 10 would give the forward convergence which is the free energy estimate from the first 10%, 20%, 30% .. of the data. The From 134036772ed3478d6c0974e41fb046aab6e2fbb0 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Fri, 8 Oct 2021 17:32:48 +0100 Subject: [PATCH 049/123] Update convergence.py --- src/alchemlyb/convergence/convergence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index a4c48630..4ece921c 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -45,7 +45,7 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): .. versionadded:: 0.6.0 ''' - logger = logging.getLogger('alchemlyb.postprocessors.' + logger = logging.getLogger('alchemlyb.convergence.' 'forward_backward_convergence') logger.info('Start convergence analysis.') logger.info('Check data availability.') From e08203559c4301def5263b5a7d88d4634f7fa411 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Fri, 8 Oct 2021 20:25:49 +0100 Subject: [PATCH 050/123] Update abfe.py --- src/alchemlyb/workflows/abfe.py | 196 ++++++-------------------------- 1 file changed, 36 insertions(+), 160 deletions(-) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 94e82ee4..accc3d7d 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -6,11 +6,12 @@ import logging from ..parsing import gmx -from ..preprocessing.subsampling import statistical_inefficiency +from ..preprocessing.subsampling import decorrelate_dhdl, decorrelate_u_nk from ..estimators import MBAR, BAR, TI from ..visualisation import (plot_mbar_overlap_matrix, plot_ti_dhdl, plot_dF_state, plot_convergence) from ..postprocessors.units import get_unit_converter +from ..convergence import forward_backward_convergence from .. import concat from .. import __version__ @@ -239,45 +240,7 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): # Find the starting frame u_nk = u_nk[u_nk.index.get_level_values('time') >= skiptime] - if uncorr == 'dhdl': - # Find the current column index - # Select the first row and remove the first column (Time) - key = u_nk.index.values[0][1:] - if len(key) > 1: - # Multiple keys - col = u_nk[key] - else: - # Single key - col = u_nk[key[0]] - subsample = statistical_inefficiency(u_nk, col, sort=True, - drop_duplicates=True) - # This part is commented out as it duplicates #98 - # The user could restore this part if it is desired. - - # elif uncorr == 'dhdl_all': - # subsample = statistical_inefficiency(u_nk, u_nk.sum(axis=1), - # sort = True, - # drop_duplicates = True) - # elif uncorr == 'dE': - # # Using the same logic as alchemical-analysis - # key = u_nk.index.values[0][1:] - # index = u_nk.columns.values.tolist().index(key) - # # for the state that is not the last state, take the state+1 - # if index + 1 < len(u_nk.columns): - # subsample = statistical_inefficiency( - # u_nk, u_nk.iloc[:, index + 1]) - # # for the state that is the last state, take the state-1 - # else: - # subsample = statistical_inefficiency( - # u_nk, u_nk.iloc[:, index - 1], - # sort = True, - # drop_duplicates = True) - - else: # pragma: no cover - # The dhdl_all and dE will be implemented here when #48 is - # merged - raise NameError( - 'Decorrelation method {} not found.'.format(uncorr)) + subsample = decorrelate_u_nk(u_nk, uncorr) if len(subsample) < threshold: self.logger.warning('Number of u_nk {} for state {} is ' @@ -296,9 +259,7 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): self.dHdl_sample_list = [] for index, dHdl in enumerate(self.dHdl_list): dHdl = dHdl[dHdl.index.get_level_values('time') >= skiptime] - subsample = statistical_inefficiency(dHdl, dHdl.sum(axis=1), - sort=True, - drop_duplicates=True) + subsample = decorrelate_dhdl(dHdl) if len(subsample) < threshold: self.logger.warning('Number of dHdl {} for state {} is ' 'less than the threshold {}.'.format( @@ -573,7 +534,9 @@ def plot_dF_state(self, dF_state='dF_state.pdf', labels=None, colors=None, def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', ax=None): - '''Compute the forward and backward convergence and plotted with + '''Compute the forward and backward convergence using + :func:`~alchemlyb.convergence.forward_backward_convergence`and + plotted with :func:`~alchemlyb.visualisation.plot_convergence`. Parameters @@ -593,19 +556,6 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', Attributes ---------- convergence : DataFrame - The DataFrame with convergence data. :: - - Forward (kT) F. Error (kT) Backward (kT) B. Error (kT) - 0 33.988935 0.334676 35.666128 0.324426 - 1 35.075489 0.232150 35.382850 0.230944 - 2 34.919988 0.190424 35.156028 0.189489 - 3 34.929927 0.165316 35.242255 0.164400 - 4 34.957007 0.147852 35.247704 0.147191 - 5 35.003660 0.134952 35.214658 0.134458 - 6 35.070199 0.124956 35.178422 0.124664 - 7 35.019853 0.116970 35.096870 0.116783 - 8 35.035123 0.110147 35.225907 0.109742 - 9 35.113417 0.104280 35.113417 0.104280 Returns ------- @@ -615,115 +565,41 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', self.logger.info('Start convergence analysis.') self.logger.info('Check data availability.') - try: - dHdl_list = self.dHdl_sample_list - self.logger.info('Subsampled dHdl is available.') - except AttributeError: + if estimator.lower() in ['mbar', 'bar']: try: - dHdl_list = self.dHdl_list - self.logger.info('Subsampled dHdl not available, ' - 'use original data instead.') - except AttributeError: # pragma: no cover - self.logger.warning('dHdl is not available.') - - try: - u_nk_list = self.u_nk_sample_list - self.logger.info('Subsampled u_nk is available.') - except AttributeError: + u_nk_list = self.u_nk_sample_list + self.logger.info('Subsampled u_nk is available.') + except AttributeError: + try: + u_nk_list = self.u_nk_list + self.logger.info('Subsampled u_nk not available, ' + 'use original data instead.') + except AttributeError: # pragma: no cover + self.logger.warning('u_nk is not available.') + convergence = forward_backward_convergence(u_nk_list, + estimator=estimator, + num=forwrev) + else: try: - u_nk_list = self.u_nk_list - self.logger.info('Subsampled u_nk not available, ' - 'use original data instead.') - except AttributeError: # pragma: no cover - self.logger.warning('u_nk is not available.') - - if estimator.lower() == 'mbar': - self.logger.info('Use MBAR estimator for convergence analysis.') - estimator_fit = MBAR().fit - elif estimator.lower() == 'bar': - self.logger.info('Use BAR estimator for convergence analysis.') - estimator_fit = BAR().fit - elif estimator.lower() == 'ti': - self.logger.info('Use TI estimator for convergence analysis.') - estimator_fit = TI().fit - else: # pragma: no cover - self.logger.warning( - '{} is not a valid estimator.'.format(estimator)) - - converter = get_unit_converter(self.units) + dHdl_list = self.dHdl_sample_list + self.logger.info('Subsampled dHdl is available.') + except AttributeError: + try: + dHdl_list = self.dHdl_list + self.logger.info('Subsampled dHdl not available, ' + 'use original data instead.') + except AttributeError: # pragma: no cover + self.logger.warning('dHdl is not available.') + convergence = forward_backward_convergence(dHdl_list, + estimator=estimator, + num=forwrev) + + self.convergence = get_unit_converter(self.units)(convergence) - self.logger.info('Begin forward analysis') - forward_list = [] - forward_error_list = [] - for i in range(1, forwrev + 1): - self.logger.info('Forward analysis: {:.2f}%'.format(i / forwrev)) - sample = [] - if estimator.lower() in ['mbar', 'bar']: - for data in u_nk_list: - sample.append(data[:len(data) // forwrev * i]) - elif estimator.lower() == 'ti': - for data in dHdl_list: - sample.append(data[:len(data) // forwrev * i]) - else: # pragma: no cover - raise NameError( - '{} is not a valid estimator.'.format(estimator)) - sample = concat(sample) - result = estimator_fit(sample) - forward_list.append(converter(result.delta_f_).iloc[0, -1]) - if estimator.lower() == 'bar': - error = np.sqrt(sum( - [converter(result.d_delta_f_).iloc[i, i + 1] ** 2 - for i in range(len(result.d_delta_f_) - 1)])) - forward_error_list.append(error) - else: - forward_error_list.append(converter(result.d_delta_f_).iloc[ - 0, -1]) - self.logger.info('{:.2f} +/- {:.2f} kT'.format(forward_list[-1], - forward_error_list[-1])) - - self.logger.info('Begin backward analysis') - backward_list = [] - backward_error_list = [] - for i in range(1, forwrev + 1): - self.logger.info('Backward analysis: {:.2f}%'.format(i / forwrev)) - sample = [] - if estimator.lower() in ['mbar', 'bar']: - for data in u_nk_list: - sample.append(data[-len(data) // forwrev * i:]) - elif estimator.lower() == 'ti': - for data in dHdl_list: - sample.append(data[-len(data) // forwrev * i:]) - else: # pragma: no cover - raise NameError( - '{} is not a valid estimator.'.format(estimator)) - sample = concat(sample) - result = estimator_fit(sample) - backward_list.append(converter(result.delta_f_).iloc[0, -1]) - if estimator.lower() == 'bar': - error = np.sqrt(sum( - [converter(result.d_delta_f_).iloc[i, i + 1] ** 2 - for i in range(len(result.d_delta_f_) - 1)])) - backward_error_list.append(error) - else: - backward_error_list.append(converter( - result.d_delta_f_).iloc[0, -1]) - self.logger.info('{:.2f} +/- {:.2f} kT'.format(backward_list[-1], - backward_error_list[-1])) - - convergence = pd.DataFrame( - {'Forward ({})'.format(self.units): forward_list, - 'F. Error ({})'.format(self.units): forward_error_list, - 'Backward ({})'.format(self.units): backward_list, - 'B. Error ({})'.format(self.units): backward_error_list}) - - self.convergence = convergence self.logger.info('Plot convergence analysis to {} under {}.' ''.format(dF_t, self.out)) - ax = plot_convergence(np.array(forward_list), - np.array(forward_error_list), - np.array(backward_list), - np.array(backward_error_list), + ax = plot_convergence(self.convergence, units=self.units, ax=ax) ax.figure.savefig(join(self.out, dF_t)) return ax From e7ce4fb8ac6ea0c917c261c7a6c57ba9ac5201d1 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Fri, 8 Oct 2021 21:25:31 +0100 Subject: [PATCH 051/123] update --- src/alchemlyb/tests/test_workflow_ABFE.py | 34 +----- src/alchemlyb/workflows/abfe.py | 128 ++++++++++++++-------- 2 files changed, 86 insertions(+), 76 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index c9acd6cd..cd031118 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -17,7 +17,7 @@ def workflow(): prefix='dhdl', suffix='xvg', T=310, skiptime=10, uncorr='dhdl', threshold=50, methods=('mbar', 'bar', 'ti'), out='./', - resultfilename='result.out', overlap='O_MBAR.pdf', + overlap='O_MBAR.pdf', breakdown=True, forwrev=10, log='result.log') return workflow @@ -42,13 +42,6 @@ def test_estomator(self, workflow): assert 'ti' in workflow.estimator assert 'bar' in workflow.estimator - def test_write(self, workflow): - '''test if the result has been written.''' - with open('result.out', 'r') as f: - text = f.read() - assert len(text.split('\n')) == 37 - os.remove('result.out') - def test_O_MBAR(self, workflow): '''test if the O_MBAR.pdf has been plotted.''' assert os.path.isfile('O_MBAR.pdf') @@ -85,7 +78,6 @@ def workflow(): workflow.update_units('kcal/mol') workflow.preprocess(skiptime=10, uncorr='dhdl', threshold=50) workflow.estimate(methods=('mbar', 'bar', 'ti')) - workflow.write(resultfilename='result.out') workflow.plot_overlap_matrix(overlap='O_MBAR.pdf') workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') workflow.plot_dF_state(dF_state='dF_state.pdf') @@ -113,13 +105,6 @@ def test_estomator(self, workflow): assert 'ti' in workflow.estimator assert 'bar' in workflow.estimator - def test_write(self, workflow): - '''test if the result has been written.''' - with open('result.out', 'r') as f: - text = f.read() - assert len(text.split('\n')) == 37 - os.remove('result.out') - def test_O_MBAR(self, workflow): '''test if the O_MBAR.pdf has been plotted.''' assert os.path.isfile('O_MBAR.pdf') @@ -170,7 +155,7 @@ def workflow(): prefix='dhdl', suffix='bz2', T=310, skiptime=0, uncorr='dhdl', threshold=50, methods=('mbar', 'bar', 'ti'), out='./', - resultfilename='result.out', overlap='O_MBAR.pdf', + overlap='O_MBAR.pdf', breakdown=True, forwrev=10, log='result.log') return workflow @@ -188,13 +173,6 @@ def test_estomator(self, workflow): assert 'ti' in workflow.estimator assert 'bar' in workflow.estimator - def test_write(self, workflow): - '''test if the result has been written.''' - with open('result.out', 'r') as f: - text = f.read() - assert len(text.split('\n')) == 10 - os.remove('result.out') - def test_O_MBAR(self, workflow): '''test if the O_MBAR.pdf has been plotted.''' assert os.path.isfile('O_MBAR.pdf') @@ -249,19 +227,11 @@ def workflow(): dHdl.set_index('bound-lambda', append=True, inplace=True) workflow.estimate(methods=('ti', )) - workflow.write(resultfilename='result.out') workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') workflow.plot_dF_state(dF_state='dF_state.pdf') workflow.check_convergence(10, dF_t='dF_t.pdf', estimator='ti') return workflow - def test_write(self, workflow): - '''test if the result has been written.''' - with open('result.out', 'r') as f: - text = f.read() - assert len(text.split('\n')) == 11 - os.remove('result.out') - def test_dhdl_TI(self, workflow): '''test if the dhdl_TI.pdf has been plotted.''' assert os.path.isfile('dhdl_TI.pdf') diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index accc3d7d..5cdb52b9 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -50,8 +50,6 @@ class ABFE(): out : str Directory in which the output files produced by this script will be stored. Default: os.path.curdir. - resultfilename : str - custom defined result filename. Default: None. (not writing the result) overlap : str The filename for the plot of overlap matrix. Default: None. (not plotting). @@ -82,7 +80,7 @@ class ABFE(): ''' def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, prefix='dhdl', suffix='xvg', T=298, skiptime=0, uncorr=None, - threshold=50, methods=None, out=os.path.curdir, resultfilename=None, + threshold=50, methods=None, out=os.path.curdir, overlap=None, breakdown=None, forwrev=None, log='result.log'): @@ -162,9 +160,6 @@ def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, if methods is not None: self.estimate(methods) - if resultfilename is not None: - self.write(resultfilename=resultfilename) - if overlap is not None: self.plot_overlap_matrix(overlap) @@ -330,30 +325,63 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): self.logger.warning( '{} is not a valid estimator.'.format(estimator)) - def write(self, resultfilename='result.out'): - '''Write the result into a text file. + def generate_result(self): + '''Summarise the result into a dataframe. + + Returns + ------- + DataFrame + The DataFrame with convergence data. :: + + MBAR MBAR_Error BAR BAR_Error TI TI_Error + States 0 -- 1 0.065967 0.001293 0.066544 0.001661 0.066663 0.001675 + 1 -- 2 0.089774 0.001398 0.089303 0.002101 0.089566 0.002144 + 2 -- 3 0.132036 0.001638 0.132687 0.002990 0.133292 0.003055 + 3 -- 4 0.116494 0.001213 0.116348 0.002691 0.116845 0.002750 + 4 -- 5 0.105251 0.000980 0.106344 0.002337 0.106603 0.002362 + 5 -- 6 0.349320 0.002781 0.343399 0.006839 0.350568 0.007393 + 6 -- 7 0.402346 0.002767 0.391368 0.006641 0.395754 0.006961 + 7 -- 8 0.322284 0.002058 0.319395 0.005333 0.321542 0.005434 + 8 -- 9 0.434999 0.002683 0.425680 0.006823 0.430251 0.007155 + 9 -- 10 0.355672 0.002219 0.350564 0.005472 0.352745 0.005591 + 10 -- 11 3.574227 0.008744 3.513595 0.018711 3.514790 0.018078 + 11 -- 12 2.896685 0.009905 2.821760 0.017844 2.823210 0.018088 + 12 -- 13 2.223769 0.011229 2.188885 0.018438 2.189784 0.018478 + 13 -- 14 1.520978 0.012526 1.493598 0.019155 1.490070 0.019288 + 14 -- 15 0.911279 0.009527 0.894878 0.015023 0.896010 0.015140 + 15 -- 16 0.892365 0.010558 0.886706 0.015260 0.884698 0.015392 + 16 -- 17 1.737971 0.025315 1.720643 0.031416 1.741028 0.030624 + 17 -- 18 1.790706 0.025560 1.788112 0.029435 1.801695 0.029244 + 18 -- 19 1.998635 0.023340 2.007404 0.027447 2.019213 0.027096 + 19 -- 20 2.263475 0.020286 2.265322 0.025023 2.282040 0.024566 + 20 -- 21 2.565680 0.016695 2.561324 0.023611 2.552977 0.023753 + 21 -- 22 1.384094 0.007553 1.385837 0.011672 1.381999 0.011991 + 22 -- 23 1.428567 0.007504 1.422689 0.012524 1.416010 0.013012 + 23 -- 24 1.440581 0.008059 1.412517 0.013125 1.408267 0.013539 + 24 -- 25 1.411329 0.009022 1.419167 0.013356 1.411446 0.013795 + 25 -- 26 1.340320 0.010167 1.360679 0.015213 1.356953 0.015260 + 26 -- 27 1.243745 0.011239 1.245873 0.015711 1.248959 0.015762 + 27 -- 28 1.128429 0.012859 1.124554 0.016999 1.121892 0.016962 + 28 -- 29 1.010313 0.016442 1.005444 0.017692 1.019747 0.017257 + Stages coul 10.215658 0.033903 10.017838 0.041839 10.017854 0.048744 + vdw 22.547489 0.098699 22.501150 0.060092 22.542936 0.106723 + bonded 2.374144 0.014995 2.341631 0.005507 2.363828 0.021078 + TOTAL 35.137291 0.103580 34.860619 0.087022 34.924618 0.119206 - Parameters - ---------- - resultfilename : str - A list of the methods to esitimate the free energy with. Default: - ['TI', 'BAR', 'MBAR']. ''' # Write estimate - self.logger.info('Write the estimate as txt file to {} under {} ' - 'with unit {}.'.format( - resultfilename, self.out, self.units)) + self.logger.info('Summarise the estimate into a dataframe.') # Make the header name - self.logger.info('Write the header names.') - result_out = [['------------', ], - [' States ', ], - ['------------', ],] + self.logger.info('Generate the row names.') eitimator_names = list(self.estimator.keys()) num_states = len(self.estimator[eitimator_names[0]].states_) + data_dict = {'name': [], + 'state': []} for i in range(num_states - 1): - result_out.append([str(i).rjust(4) + ' -- ' + str(i+1).ljust(4), ]) - result_out.append(['------------', ]) + data_dict['name'].append(str(i) + ' -- ' + str(i+1)) + data_dict['state'].append('States') + try: u_nk = self.u_nk_list[0] stages = u_nk.reset_index('time').index.names @@ -367,31 +395,34 @@ def write(self, resultfilename='result.out'): stages = [] self.logger.warning('No stage name found in dHdl or u_nk') for stage in stages: - result_out.append([stage.split('-')[0][:9].rjust(9)+': ', ]) - result_out.append(['TOTAL'.rjust(9) + ': ', ]) + data_dict['name'].append(stage.split('-')[0]) + data_dict['state'].append('Stages') + data_dict['name'].append('TOTAL') + data_dict['state'].append('Stages') converter = get_unit_converter(self.units) + col_names = [] for estimator_name, estimator in self.estimator.items(): - self.logger.info('write the result from estimator {}'.format( + self.logger.info('Read the results from estimator {}'.format( estimator_name)) # Do the unit conversion delta_f_ = converter(estimator.delta_f_) d_delta_f_ = converter(estimator.d_delta_f_) # Write the estimator header - result_out[0].append('---------------------') - result_out[1].append('{} ({}) '.format( - estimator_name.upper(), self.units).rjust(21)) - result_out[2].append('---------------------') - for index in range(1, num_states): - result_out[2+index].append('{:.3f} +- {:.3f}'.format( - delta_f_.iloc[index-1, index], - d_delta_f_.iloc[index-1, index] - ).rjust(21)) - result_out[2+num_states].append('---------------------') + col_names.append(estimator_name.upper()) + col_names.append(estimator_name.upper() + '_Error') + data_dict[estimator_name.upper()] = [] + data_dict[estimator_name.upper() + '_Error'] = [] + for index in range(1, num_states): + data_dict[estimator_name.upper()].append( + delta_f_.iloc[index-1, index]) + data_dict[estimator_name.upper() + '_Error'].append( + d_delta_f_.iloc[index - 1, index]) - self.logger.info('write the staged result from estimator {}'.format( + self.logger.info('Generate the staged result from estimator {' + '}'.format( estimator_name)) for index, stage in enumerate(stages): if len(stages) == 1: @@ -423,8 +454,8 @@ def write(self, resultfilename='result.out'): error = np.sqrt(sum( [d_delta_f_.iloc[start, start+1]**2 for i in range(start, end + 1)])) - result_out[3 + num_states + index].append( - '{:.3f} +- {:.3f}'.format(result, error,).rjust(21)) + data_dict[estimator_name.upper()].append(result) + data_dict[estimator_name.upper() + '_Error'].append(error) # Total result result = delta_f_.iloc[0, -1] @@ -434,12 +465,21 @@ def write(self, resultfilename='result.out'): error = np.sqrt(sum( [d_delta_f_.iloc[i, i + 1] ** 2 for i in range(num_states - 1)])) - result_out[3 + num_states + len(stages)].append( - '{:.3f} +- {:.3f}'.format(result, error, ).rjust(21)) - self.logger.info('Write results:\n'+ - '\n'.join([' '.join(line) for line in result_out])) - with open(join(self.out, resultfilename), 'w') as f: - f.write('\n'.join([' '.join(line) for line in result_out])) + data_dict[estimator_name.upper()].append(result) + data_dict[estimator_name.upper() + '_Error'].append(error) + summary = pd.DataFrame.from_dict(data_dict) + + summary = summary.set_index(['state', 'name']) + # Make sure that the columns are in the right order + summary = summary[col_names] + # Remove the name of the index column to make it prettier + summary.index.names = [None, None] + + summary.attrs = estimator.delta_f_.attrs + + self.summary = summary + self.logger.info('Write results:\n{}'.format(summary.to_string())) + return summary def plot_overlap_matrix(self, overlap='O_MBAR.pdf', ax=None): '''Plot the overlap matrix for MBAR estimator using From 3cbd0169c3e03235a1780a6f0e026a6395bbc087 Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Mon, 18 Oct 2021 14:11:55 +0100 Subject: [PATCH 052/123] Update abfe.py --- src/alchemlyb/workflows/abfe.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 5cdb52b9..40dc87a5 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -161,16 +161,22 @@ def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, self.estimate(methods) if overlap is not None: - self.plot_overlap_matrix(overlap) + ax = self.plot_overlap_matrix(overlap) + plt.close(ax.figure) if breakdown: - self.plot_ti_dhdl() - self.plot_dF_state() - self.plot_dF_state(dF_state='dF_state_long.pdf', - orientation='landscape') + ax = self.plot_ti_dhdl() + plt.close(ax.figure) + fig = self.plot_dF_state() + plt.close(fig) + fig = self.plot_dF_state(dF_state='dF_state_long.pdf', + orientation='landscape') + plt.close(fig) if forwrev is not None: - self.check_convergence(forwrev, estimator='mbar', dF_t='dF_t.pdf') + ax = self.check_convergence(forwrev, estimator='mbar', dF_t='dF_t.pdf') + plt.close(ax.figure) + def update_units(self, units): '''Update the plot and text output to the selected unit. @@ -400,15 +406,14 @@ def generate_result(self): data_dict['name'].append('TOTAL') data_dict['state'].append('Stages') - converter = get_unit_converter(self.units) col_names = [] for estimator_name, estimator in self.estimator.items(): self.logger.info('Read the results from estimator {}'.format( estimator_name)) # Do the unit conversion - delta_f_ = converter(estimator.delta_f_) - d_delta_f_ = converter(estimator.d_delta_f_) + delta_f_ = estimator.delta_f_ + d_delta_f_ = estimator.d_delta_f_ # Write the estimator header col_names.append(estimator_name.upper()) @@ -476,7 +481,8 @@ def generate_result(self): summary.index.names = [None, None] summary.attrs = estimator.delta_f_.attrs - + converter = get_unit_converter(self.units) + summary = converter(summary) self.summary = summary self.logger.info('Write results:\n{}'.format(summary.to_string())) return summary @@ -540,6 +546,7 @@ def plot_ti_dhdl(self, dhdl_TI='dhdl_TI.pdf', labels=None, colors=None, ax.figure.savefig(join(self.out, dhdl_TI)) self.logger.info('Plot TI dHdl to {} under {}.' ''.format(dhdl_TI, self.out)) + return ax def plot_dF_state(self, dF_state='dF_state.pdf', labels=None, colors=None, orientation='portrait', nb=10): @@ -571,6 +578,7 @@ def plot_dF_state(self, dF_state='dF_state.pdf', labels=None, colors=None, fig.savefig(join(self.out, dF_state)) self.logger.info('Plot dF state to {} under {}.' ''.format(dF_state, self.out)) + return fig def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', ax=None): From b70e68c124ee9af5668fb69686ce21f10c9e394c Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Mon, 18 Oct 2021 14:49:33 +0100 Subject: [PATCH 053/123] Update abfe.py --- src/alchemlyb/workflows/abfe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 40dc87a5..84b55a3f 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -4,6 +4,7 @@ import pandas as pd import numpy as np import logging +import matplotlib.pyplot as plt from ..parsing import gmx from ..preprocessing.subsampling import decorrelate_dhdl, decorrelate_u_nk From c8b7f66f027266f5f69d3cf9745bf81b2332a776 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Fri, 22 Oct 2021 16:57:42 +0100 Subject: [PATCH 054/123] Update convergence.py --- src/alchemlyb/visualisation/convergence.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index 8ece8ddf..13e4ea34 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -53,9 +53,6 @@ def plot_convergence(*data, units='kT', ax=None): Changing it doesn't change the unit of the underlying variable. - .. versionchanged:: 0.6.0 - data now takes in dataframe - .. versionchanged:: 0.6.0 data now takes in dataframe From 24298a4e16632634ce59ec9703e1e84970690652 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 30 Oct 2021 11:12:44 +0100 Subject: [PATCH 055/123] Update abfe.py --- src/alchemlyb/workflows/abfe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 84b55a3f..bb82fe41 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -8,7 +8,8 @@ from ..parsing import gmx from ..preprocessing.subsampling import decorrelate_dhdl, decorrelate_u_nk -from ..estimators import MBAR, BAR, TI +from ..estimators import BAR, TI +from ..estimators import AutoMBAR as MBAR from ..visualisation import (plot_mbar_overlap_matrix, plot_ti_dhdl, plot_dF_state, plot_convergence) from ..postprocessors.units import get_unit_converter From 73b781e5a0f15033e7bd1e32ac566c968cac9a4c Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 30 Oct 2021 11:20:17 +0100 Subject: [PATCH 056/123] Update convergence.py --- src/alchemlyb/convergence/convergence.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index 4ece921c..b126f47c 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -2,7 +2,8 @@ import logging import numpy as np -from ..estimators import MBAR, BAR, TI +from ..estimators import BAR, TI +from ..estimators import AutoMBAR as MBAR from .. import concat From e93d3bfd8b3bc7279b805ab7fd6569cfdd85a149 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 30 Oct 2021 20:49:26 +0100 Subject: [PATCH 057/123] update --- docs/workflow.rst | 57 ++++++++++++++++------- src/alchemlyb/tests/test_workflow_ABFE.py | 29 ++++++++++++ src/alchemlyb/workflows/__init__.py | 2 +- src/alchemlyb/workflows/abfe.py | 3 ++ 4 files changed, 72 insertions(+), 19 deletions(-) diff --git a/docs/workflow.rst b/docs/workflow.rst index f6c470d9..7a9e8a6d 100644 --- a/docs/workflow.rst +++ b/docs/workflow.rst @@ -31,24 +31,43 @@ could be excuted with a single line of command. :: This would give the free energy estimate using all of :class:`~alchemlyb.estimators.TI`, :class:`~alchemlyb.estimators.BAR`, -:class:`~alchemlyb.estimators.MBAR` and the result will be written to the text -file `result.out`. :: - - ------------ --------------------- --------------------- --------------------- - States MBAR (kcal/mol) BAR (kcal/mol) TI (kcal/mol) - ------------ --------------------- --------------------- --------------------- - 0 -- 1 0.041 +- 0.001 0.041 +- 0.001 0.041 +- 0.001 - 1 -- 2 0.056 +- 0.001 0.055 +- 0.001 0.056 +- 0.001 - 2 -- 3 0.082 +- 0.001 0.082 +- 0.002 0.083 +- 0.002 - ... - 26 -- 27 0.766 +- 0.007 0.768 +- 0.010 0.770 +- 0.010 - 27 -- 28 0.694 +- 0.008 0.691 +- 0.011 0.690 +- 0.010 - 28 -- 29 0.620 +- 0.010 0.616 +- 0.011 0.625 +- 0.011 - ------------ --------------------- --------------------- --------------------- - coul: 6.290 +- 0.021 6.168 +- 0.026 6.168 +- 0.030 - vdw: 13.872 +- 0.061 13.852 +- 0.037 13.877 +- 0.066 - bonded: 1.469 +- 0.009 1.447 +- 0.003 1.461 +- 0.013 - TOTAL: 21.631 +- 0.064 21.467 +- 0.054 21.506 +- 0.074 +:class:`~alchemlyb.estimators.MBAR` and the result will be given as +pandas dataframe to :attr:`alchemlyb.workflows.ABFE.summary` :: + + MBAR MBAR_Error BAR BAR_Error TI TI_Error + States 0 -- 1 0.065967 0.001293 0.066544 0.001661 0.066663 0.001675 + 1 -- 2 0.089774 0.001398 0.089303 0.002101 0.089566 0.002144 + 2 -- 3 0.132036 0.001638 0.132687 0.002990 0.133292 0.003055 + 3 -- 4 0.116494 0.001213 0.116348 0.002691 0.116845 0.002750 + 4 -- 5 0.105251 0.000980 0.106344 0.002337 0.106603 0.002362 + 5 -- 6 0.349320 0.002781 0.343399 0.006839 0.350568 0.007393 + 6 -- 7 0.402346 0.002767 0.391368 0.006641 0.395754 0.006961 + 7 -- 8 0.322284 0.002058 0.319395 0.005333 0.321542 0.005434 + 8 -- 9 0.434999 0.002683 0.425680 0.006823 0.430251 0.007155 + 9 -- 10 0.355672 0.002219 0.350564 0.005472 0.352745 0.005591 + 10 -- 11 3.574227 0.008744 3.513595 0.018711 3.514790 0.018078 + 11 -- 12 2.896685 0.009905 2.821760 0.017844 2.823210 0.018088 + 12 -- 13 2.223769 0.011229 2.188885 0.018438 2.189784 0.018478 + 13 -- 14 1.520978 0.012526 1.493598 0.019155 1.490070 0.019288 + 14 -- 15 0.911279 0.009527 0.894878 0.015023 0.896010 0.015140 + 15 -- 16 0.892365 0.010558 0.886706 0.015260 0.884698 0.015392 + 16 -- 17 1.737971 0.025315 1.720643 0.031416 1.741028 0.030624 + 17 -- 18 1.790706 0.025560 1.788112 0.029435 1.801695 0.029244 + 18 -- 19 1.998635 0.023340 2.007404 0.027447 2.019213 0.027096 + 19 -- 20 2.263475 0.020286 2.265322 0.025023 2.282040 0.024566 + 20 -- 21 2.565680 0.016695 2.561324 0.023611 2.552977 0.023753 + 21 -- 22 1.384094 0.007553 1.385837 0.011672 1.381999 0.011991 + 22 -- 23 1.428567 0.007504 1.422689 0.012524 1.416010 0.013012 + 23 -- 24 1.440581 0.008059 1.412517 0.013125 1.408267 0.013539 + 24 -- 25 1.411329 0.009022 1.419167 0.013356 1.411446 0.013795 + 25 -- 26 1.340320 0.010167 1.360679 0.015213 1.356953 0.015260 + 26 -- 27 1.243745 0.011239 1.245873 0.015711 1.248959 0.015762 + 27 -- 28 1.128429 0.012859 1.124554 0.016999 1.121892 0.016962 + 28 -- 29 1.010313 0.016442 1.005444 0.017692 1.019747 0.017257 + Stages coul 10.215658 0.033903 10.017838 0.041839 10.017854 0.048744 + vdw 22.547489 0.098699 22.501150 0.060092 22.542936 0.106723 + bonded 2.374144 0.014995 2.341631 0.005507 2.363828 0.021078 + TOTAL 35.137291 0.103580 34.860619 0.087022 34.924618 0.119206 The :ref:`overlay matrix for the MBAR estimator ` will be plotted and saved to `O_MBAR.pdf`. @@ -98,6 +117,8 @@ to the data generated at each stage of the analysis. :: >>> workflow.plot_dF_state(dF_state='dF_state.pdf') >>> # Convergence analysis >>> workflow.check_convergence(10, dF_t='dF_t.pdf') + >>> # Generate the results + >>> summary = workflow.generate_result() .. currentmodule:: alchemlyb.workflows.ABFE diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index cd031118..cb42c2fd 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -42,6 +42,11 @@ def test_estomator(self, workflow): assert 'ti' in workflow.estimator assert 'bar' in workflow.estimator + def test_summary(self, workflow): + '''Test if if the summary is right.''' + summary = workflow.generate_result() + assert np.isclose(summary['MBAR']['Stages']['TOTAL'], 21.788, 0.1) + def test_O_MBAR(self, workflow): '''test if the O_MBAR.pdf has been plotted.''' assert os.path.isfile('O_MBAR.pdf') @@ -126,6 +131,30 @@ def test_convergence(self, workflow): os.remove('dF_t.pdf') assert len(workflow.convergence) == 10 + def test_convergence_nosample_u_nk(self, workflow): + '''test if the dF_state.pdf has been plotted.''' + u_nk_sample_list = workflow.u_nk_sample_list + delattr(workflow, 'u_nk_sample_list') + workflow.check_convergence(10) + os.remove('dF_t.pdf') + assert len(workflow.convergence) == 10 + workflow.u_nk_sample_list = u_nk_sample_list + + def test_convergence_nosample_dhdl(self, workflow): + '''test if the dF_state.pdf has been plotted.''' + dHdl_sample_list = workflow.dHdl_sample_list + delattr(workflow, 'dHdl_sample_list') + workflow.check_convergence(10, estimator='ti') + os.remove('dF_t.pdf') + assert len(workflow.convergence) == 10 + workflow.dHdl_sample_list = dHdl_sample_list + + def test_convergence_dhdl(self, workflow): + '''test if the dF_state.pdf has been plotted.''' + workflow.check_convergence(10, estimator='ti') + os.remove('dF_t.pdf') + assert len(workflow.convergence) == 10 + def test_convergence_TI(self, workflow): '''test if the dF_state.pdf has been plotted.''' workflow.check_convergence(10, estimator='ti', dF_t='dF_t.pdf') diff --git a/src/alchemlyb/workflows/__init__.py b/src/alchemlyb/workflows/__init__.py index 9074a415..24e6ab7d 100644 --- a/src/alchemlyb/workflows/__init__.py +++ b/src/alchemlyb/workflows/__init__.py @@ -1 +1 @@ -from .abfe import ABFE \ No newline at end of file +from .abfe import ABFE diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index bb82fe41..b0b59a07 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -79,6 +79,8 @@ class ABFE(): The list of u_nk read from the files. dHdl_list : list The list of dHdl read from the files. + summary : Dataframe + The summary of the free energy estimate. ''' def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, prefix='dhdl', suffix='xvg', T=298, skiptime=0, uncorr=None, @@ -161,6 +163,7 @@ def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, threshold=threshold) if methods is not None: self.estimate(methods) + self.generate_result() if overlap is not None: ax = self.plot_overlap_matrix(overlap) From 281a8648251f66bd0adbd5bbe2ad7b3b085f78a2 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 30 Oct 2021 21:00:38 +0100 Subject: [PATCH 058/123] update --- docs/workflow.rst | 17 ++++++++--------- src/alchemlyb/workflows/abfe.py | 17 +++++------------ 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/docs/workflow.rst b/docs/workflow.rst index 7a9e8a6d..dfc514d6 100644 --- a/docs/workflow.rst +++ b/docs/workflow.rst @@ -2,7 +2,7 @@ Automatic workflow ================== Though **alchemlyb** is a library offering great flexibility in deriving free energy estimate, it also provide a easy pipeline that is similar to -`Alchemical Analysis `_ and a +`Alchemical Analysis `_ and a step-by-step version that allows more flexibility. Note @@ -12,7 +12,7 @@ This is an experimental feature and is not API stable. Fully Automatic analysis ------------------------ A interface similar to -`Alchemical Analysis `_ +`Alchemical Analysis `_ could be excuted with a single line of command. :: >>> import os @@ -26,13 +26,13 @@ could be excuted with a single line of command. :: >>> prefix='dhdl', suffix='xvg', T=298, skiptime=10, >>> uncorr='dhdl', threshold=50, >>> methods=('mbar', 'bar', 'ti'), out='./', - >>> resultfilename='result.out', overlap='O_MBAR.pdf', + >>> overlap='O_MBAR.pdf', >>> breakdown=True, forwrev=10, log='result.log') This would give the free energy estimate using all of :class:`~alchemlyb.estimators.TI`, :class:`~alchemlyb.estimators.BAR`, :class:`~alchemlyb.estimators.MBAR` and the result will be given as -pandas dataframe to :attr:`alchemlyb.workflows.ABFE.summary` :: +:class:`pandas.DataFrame` to :attr:`alchemlyb.workflows.ABFE.summary` :: MBAR MBAR_Error BAR BAR_Error TI TI_Error States 0 -- 1 0.065967 0.001293 0.066544 0.001661 0.066663 0.001675 @@ -107,8 +107,8 @@ to the data generated at each stage of the analysis. :: >>> workflow.preprocess(skiptime=10, uncorr='dhdl', threshold=50) >>> # Run the estimator >>> workflow.estimate(methods=('mbar', 'bar', 'ti')) - >>> # write the result - >>> workflow.write(resultfilename='result.out') + >>> # Generate the results + >>> summary = workflow.generate_result() >>> # Plot the overlap matrix >>> workflow.plot_overlap_matrix(overlap='O_MBAR.pdf') >>> # Plot the dHdl for TI @@ -117,8 +117,7 @@ to the data generated at each stage of the analysis. :: >>> workflow.plot_dF_state(dF_state='dF_state.pdf') >>> # Convergence analysis >>> workflow.check_convergence(10, dF_t='dF_t.pdf') - >>> # Generate the results - >>> summary = workflow.generate_result() + .. currentmodule:: alchemlyb.workflows.ABFE @@ -126,7 +125,7 @@ to the data generated at each stage of the analysis. :: .. autofunction:: update_units .. autofunction:: preprocess .. autofunction:: estimate -.. autofunction:: write +.. autofunction:: generate_result .. autofunction:: plot_overlap_matrix .. autofunction:: plot_ti_dhdl .. autofunction:: plot_dF_state diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index b0b59a07..64282b69 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -79,8 +79,6 @@ class ABFE(): The list of u_nk read from the files. dHdl_list : list The list of dHdl read from the files. - summary : Dataframe - The summary of the free energy estimate. ''' def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, prefix='dhdl', suffix='xvg', T=298, skiptime=0, uncorr=None, @@ -184,7 +182,7 @@ def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, def update_units(self, units): - '''Update the plot and text output to the selected unit. + '''Update the unit. Parameters ---------- @@ -192,15 +190,6 @@ def update_units(self, units): The unit used for printing and plotting results. {'kcal/mol', 'kJ/mol', 'kT'} - Attributes - ---------- - scaling_factor : float - The scaling factor to change the unit from kT to the selected unit. - - Note - ---- - The internal representations are all in kT. This function only changes - the unit when outputting text file or plotting the results. ''' if units is not None: self.logger.info('Set unit to {}.'.format(units)) @@ -379,6 +368,10 @@ def generate_result(self): bonded 2.374144 0.014995 2.341631 0.005507 2.363828 0.021078 TOTAL 35.137291 0.103580 34.860619 0.087022 34.924618 0.119206 + Attributes + ---------- + summary : Dataframe + The summary of the free energy estimate. ''' # Write estimate From 602b98bcea3786672a86f475326f6bfeaab3e118 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 30 Oct 2021 22:28:14 +0100 Subject: [PATCH 059/123] Update test_workflow_ABFE.py --- src/alchemlyb/tests/test_workflow_ABFE.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index cb42c2fd..d445ef0e 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -277,6 +277,11 @@ def test_convergence(self, workflow): os.remove('dF_t.pdf') assert len(workflow.convergence) == 10 + def test_single_estimator_mbar(self, workflow): + workflow.estimate(methods='ti') + summary = workflow.generate_result() + assert np.isclose(summary['TI']['Stages']['TOTAL'], 2.946, 0.1) + class Test_methods(): '''Test various methods.''' @@ -301,10 +306,20 @@ def test_uncorr_threshold(self, workflow): workflow.u_nk_list = original_u_nk workflow.dHdl_list = original_dHdl - def test_single_estimator(self, workflow): + def test_single_estimator_mbar(self, workflow): workflow.estimate(methods='mbar') assert len(workflow.estimator) == 1 assert 'mbar' in workflow.estimator + summary = workflow.generate_result() + assert np.isclose(summary['MBAR']['Stages']['TOTAL'], 2.946, 0.1) + + def test_single_estimator_ti(self, workflow): + u_nk_list = workflow.u_nk_list + delattr(workflow, 'u_nk_list') + workflow.estimate(methods='ti') + summary = workflow.generate_result() + assert np.isclose(summary['TI']['Stages']['TOTAL'], 2.946, 0.1) + workflow.u_nk_list = u_nk_list def test_bar_convergence(self, workflow): workflow.check_convergence(10, estimator='bar') From b8606ddcc8cf8573d33bb083090a1454307c5dfd Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Thu, 30 Dec 2021 15:58:40 +0000 Subject: [PATCH 060/123] Reobust gmx (#2) * updated CHANGES and docs for release 0.6.0 * support Python 3.10 (Keep Python 3.7 even though we could drop it as of two days ago.) * document convergence.convergence with summary table * add module links to docs * add link to GitHub Discussions forum - in docs - in README (update with the Getting Involved section from docs) * update Co-authored-by: Oliver Beckstein --- .github/workflows/ci.yaml | 2 +- CHANGES | 5 +- README.rst | 43 +++++++++---- docs/api_principles.rst | 30 +++------- docs/convergence.rst | 16 ++++- .../alchemlyb.convergence.convergence.rst | 12 ++++ docs/estimators.rst | 2 + docs/index.rst | 13 +++- docs/parsing.rst | 3 + docs/postprocessing.rst | 2 + docs/preprocessing.rst | 3 + docs/visualisation.rst | 2 + setup.py | 1 + src/alchemlyb/convergence/convergence.py | 13 ++-- src/alchemlyb/parsing/gmx.py | 11 +++- src/alchemlyb/tests/parsing/test_gmx.py | 60 +++++++++++++++++++ 16 files changed, 169 insertions(+), 49 deletions(-) create mode 100644 docs/convergence/alchemlyb.convergence.convergence.rst diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d80f06dd..5a5f10e1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -22,7 +22,7 @@ jobs: fail-fast: true matrix: os: ["ubuntu-latest", "macOS-latest", "windows-latest"] - python-version: ["3.7", "3.8", "3.9"] + python-version: ["3.7", "3.8", "3.9", "3.10"] steps: - uses: actions/checkout@v2 diff --git a/CHANGES b/CHANGES index 60d483c8..3c6c588d 100644 --- a/CHANGES +++ b/CHANGES @@ -13,13 +13,12 @@ The rules for this file: * release numbers follow "Semantic Versioning" https://semver.org ------------------------------------------------------------------------------ -??/??/2021 schlaicha, xiki-tempula, jhenin, ttjoseph, orbeckst +12/28/2021 schlaicha, xiki-tempula, jhenin, ttjoseph, orbeckst * 0.6.0 -Changes - Enhancements + - support Python 3.10 - support for Interleaved Double-Wide Sampling (IDWS) in NAMD (PR #135). Windows may be split across NAMD .fepout files, allowing for interrupted and restarted simulations, but each window must be complete (issue #145). diff --git a/README.rst b/README.rst index 51fca4b4..3ade4108 100644 --- a/README.rst +++ b/README.rst @@ -11,15 +11,41 @@ includes: molecular dynamics engines such as `GROMACS`_, `AMBER`_, `NAMD`_ and `other simulation codes`_. -2. Subsamplers for obtaining uncorrelated samples from timeseries data. +2. Subsamplers for obtaining uncorrelated samples from timeseries data + (including extracting independent, equilibrated samples + [Chodera2016]_ as implemented in the pymbar_ package). 3. Estimators for obtaining free energies directly from this data, using best-practices approaches for multistate Bennett acceptance ratio (MBAR) - [Shirts2008]_ and thermodynamic integration (TI). + [Shirts2008]_ and BAR (from pymbar_) and thermodynamic integration (TI). -In particular, it uses internally the excellent `pymbar -`_ library for performing MBAR and extracting -independent, equilibrated samples [Chodera2016]_. +.. _GROMACS: http://www.gromacs.org/ +.. _AMBER: http://ambermd.org/ +.. _NAMD: http://www.ks.uiuc.edu/Research/namd/ +.. _`other simulation codes`: https://alchemlyb.readthedocs.io/en/latest/parsing.html +.. _`pymbar`: http://pymbar.readthedocs.io/ + + +Getting involved +---------------- + +Contributions of all kinds are very welcome. + +If you have questions or want to discuss alchemlyb please post in the `alchemlyb Discussions`_. + +If you have bug reports or feature requests then please get in touch with us through the `Issue Tracker`_. + +We also welcome code contributions: have a look at our `Developer Guide`_. Open an issue with the proposed fix or change in the `Issue Tracker`_ and submit a pull request against the `alchemistry/alchemlyb`_ GitHub repository. + +.. _`alchemlyb Discussions`: https://github.com/alchemistry/alchemlyb/discussions +.. _`Developer Guide`: https://github.com/alchemistry/alchemlyb/wiki/Developer-Guide +.. _`Issue Tracker`: https://github.com/alchemistry/alchemlyb/issues +.. _`alchemistry/alchemlyb`: https://github.com/alchemistry/alchemlyb + + + +References +---------- .. [Shirts2008] Shirts, M.R., and Chodera, J.D. (2008). Statistically optimal analysis of samples from multiple equilibrium states. The Journal of Chemical @@ -29,14 +55,7 @@ independent, equilibrated samples [Chodera2016]_. Equilibration Detection in Molecular Simulations. Journal of Chemical Theory and Computation 12, 1799–1805. -.. _GROMACS: http://www.gromacs.org/ -.. _AMBER: http://ambermd.org/ - -.. _NAMD: http://www.ks.uiuc.edu/Research/namd/ - -.. _`other simulation codes`: https://alchemlyb.readthedocs.io/en/latest/parsing.html - .. |doi| image:: https://zenodo.org/badge/68669096.svg :alt: Zenodo DOI :scale: 100% diff --git a/docs/api_principles.rst b/docs/api_principles.rst index 8d912d08..7abbe7f7 100644 --- a/docs/api_principles.rst +++ b/docs/api_principles.rst @@ -52,7 +52,7 @@ The library is structured as follows, following a similar style to │   ├── mbar_.py │   ├── ti_.py │   └── ... - ├── convergence ### NOT IMPLEMENTED + ├── convergence │   ├── convergence.py │   └── ... ├── postprocessors @@ -64,30 +64,18 @@ The library is structured as follows, following a similar style to │   ├── mbar_matrix.py │   ├── ti_dhdl.py │   └── ... - └── workflows + └── workflows ### WORK IN PROGRESS └── ... -The :mod:`~alchemlyb.parsing` submodule contains parsers for individual MD engines, since the output files needed to perform alchemical free energy calculations vary widely and are not standardized. -Each module at the very least provides an `extract_u_nk` function for extracting reduced potentials (needed for MBAR), as well as an `extract_dHdl` function for extracting derivatives required for thermodynamic integration. -Other helper functions may be exposed for additional processing, such as generating an XVG file from an EDR file in the case of GROMACS. -All `extract\_*` functions take similar arguments (a file path, -parameters such as temperature), and produce standard outputs -(:class:`pandas.DataFrame` for reduced potentials, :class:`pandas.Series` for derivatives). +* The :mod:`~alchemlyb.parsing` submodule contains parsers for individual MD engines, since the output files needed to perform alchemical free energy calculations vary widely and are not standardized. Each module at the very least provides an `extract_u_nk` function for extracting reduced potentials (needed for MBAR), as well as an `extract_dHdl` function for extracting derivatives required for thermodynamic integration. Other helper functions may be exposed for additional processing, such as generating an XVG file from an EDR file in the case of GROMACS. All `extract\_*` functions take similar arguments (a file path, parameters such as temperature), and produce standard outputs (:class:`pandas.DataFrame` for reduced potentials, :class:`pandas.Series` for derivatives). +* The :mod:`~alchemlyb.preprocessing` submodule features functions for subsampling timeseries, as may be desired before feeding them to an estimator. So far, these are limited to `slicing`, `statistical_inefficiency`, and `equilibrium_detection` functions, many of which make use of subsampling schemes available from :mod:`pymbar`. These functions are written in such a way that they can be easily composed as parts of complex processing pipelines. +* The :mod:`~alchemlyb.estimators` module features classes *a la* **scikit-learn** that can be initialized with parameters that determine their behavior and then "trained" on a `fit` method. MBAR, BAR, and thermodynamic integration (TI) as the major methods are all implemented. Correct error estimates require the use of time series with independent samples. +* The :mod:`~alchemlyb.convergence` submodule features convenience functions/classes for doing convergence analysis using a given dataset and a chosen estimator. +* The :mod:`~alchemlyb.postprocessors` submodule contains functions to calculate new quantities or express data in different units. +* The :mod:`~alchemlyb.visualisation` submodule contains convenience plotting functions as known from, for example, `alchemical-analysis.py`_. +* The :mod:`~alchemlyb.workflows` submodule will contain complete analysis workflows that will serve as larger building blocks for complex analysis pipelines or a command line interface. -The :mod:`~alchemlyb.preprocessing` submodule features functions for subsampling timeseries, as may be desired before feeding them to an estimator. -So far, these are limited to `slicing`, `statistical_inefficiency`, and `equilibrium_detection` functions, many of which make use of subsampling schemes available from :mod:`pymbar`. -These functions are written in such a way that they can be easily composed as parts of complex processing pipelines. - -The :mod:`~alchemlyb.estimators` module features classes *a la* **scikit-learn** that can be initialized with parameters that determine their behavior and then "trained" on a `fit` method. -MBAR, BAR, and thermodynamic integration (TI) as the major methods are all implemented. -Correct error estimates require the use of time series with independent samples. - -The :mod:`~alchemlyb.convergence` submodule will feature convenience functions/classes for doing convergence analysis using a given dataset and a chosen estimator. - -The :mod:`~alchemlyb.postprocessing` submodule contains functions to calculate new quantities or express data in different units. - -The :mod:`~alchemlyb.visualization` submodule contains convenience plotting functions as known from, for example, `alchemical-analysis.py`_. All of these components lend themselves well to writing clear and flexible pipelines for processing data needed for alchemical free energy calculations, and furthermore allow for scaling up via libraries like `dask`_ or `joblib`_. diff --git a/docs/convergence.rst b/docs/convergence.rst index 14a1b480..55a4e270 100644 --- a/docs/convergence.rst +++ b/docs/convergence.rst @@ -1,3 +1,5 @@ +.. module:: alchemlyb.convergence + Using functions to estimate Convergence ======================================= @@ -31,4 +33,16 @@ Will give a plot looks like this A convergence plot of showing that the forward and backward has converged fully. -.. autofunction:: alchemlyb.convergence.forward_backward_convergence \ No newline at end of file + +Convergence functions +--------------------- + +The currently available connvergence functions: + +.. currentmodule:: alchemlyb.convergence + +.. autosummary:: + :toctree: convergence + + convergence + diff --git a/docs/convergence/alchemlyb.convergence.convergence.rst b/docs/convergence/alchemlyb.convergence.convergence.rst new file mode 100644 index 00000000..981c9b27 --- /dev/null +++ b/docs/convergence/alchemlyb.convergence.convergence.rst @@ -0,0 +1,12 @@ +Convergence analysis +==================== +.. automodule:: alchemlyb.convergence.convergence + +This module contains building blocks that perform a specific convergence analysis. They typically operate on lists of raw data and run estimators on these data sets. + + +API Reference +------------- +This submodule includes these convergence functions: + +.. autofunction:: alchemlyb.convergence.forward_backward_convergence diff --git a/docs/estimators.rst b/docs/estimators.rst index d1eceebb..af3476f1 100644 --- a/docs/estimators.rst +++ b/docs/estimators.rst @@ -1,3 +1,5 @@ +.. module:: alchemlyb.estimators + .. _estimators: Using estimators to obtain free energies diff --git a/docs/index.rst b/docs/index.rst index 9b7dfb5e..d873a083 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -54,10 +54,17 @@ The pandas_ package (one of our other primary dependencies) also follows `NEP 29 .. _contact: -Contributing ------------- -Contributions are very welcome. If you have bug reports or feature requests or questions then please get in touch with us through the `Issue Tracker`_. We also welcome code contributions: have a look at our `Developer Guide`_ and submit a pull request against the `alchemistry/alchemlyb`_ GitHub repository. +Getting involved +---------------- +Contributions of all kinds are very welcome. +If you have questions or want to discuss alchemlyb please post in the `alchemlyb Discussions`_. + +If you have bug reports or feature requests then please get in touch with us through the `Issue Tracker`_. + +We also welcome code contributions: have a look at our `Developer Guide`_. Open an issue with the proposed fix or change in the `Issue Tracker`_ and submit a pull request against the `alchemistry/alchemlyb`_ GitHub repository. + +.. _`alchemlyb Discussions`: https://github.com/alchemistry/alchemlyb/discussions .. _`Developer Guide`: https://github.com/alchemistry/alchemlyb/wiki/Developer-Guide .. _`Issue Tracker`: https://github.com/alchemistry/alchemlyb/issues .. _`alchemistry/alchemlyb`: https://github.com/alchemistry/alchemlyb diff --git a/docs/parsing.rst b/docs/parsing.rst index 4af2d65b..c3a4181c 100644 --- a/docs/parsing.rst +++ b/docs/parsing.rst @@ -1,3 +1,6 @@ +.. module:: alchemlyb.parsing + + Parsing data files ================== **alchemlyb** features parsing submodules for getting raw data from different software packages into common data structures that can be used directly by its :ref:`subsamplers ` and :ref:`estimators `. diff --git a/docs/postprocessing.rst b/docs/postprocessing.rst index d7451108..e6e4dc80 100644 --- a/docs/postprocessing.rst +++ b/docs/postprocessing.rst @@ -1,3 +1,5 @@ +.. module:: alchemlyb.postprocessors + .. _postprocessing: Tools for postprocessing diff --git a/docs/preprocessing.rst b/docs/preprocessing.rst index 44ba8baa..dcd2ad19 100644 --- a/docs/preprocessing.rst +++ b/docs/preprocessing.rst @@ -1,3 +1,6 @@ +.. module:: alchemlyb.preprocessing + + Preprocessing datasets ====================== It is often the case that some initial pre-processing of raw datasets are desirable before feeding these to an estimator. diff --git a/docs/visualisation.rst b/docs/visualisation.rst index 2390cc63..e25e1266 100644 --- a/docs/visualisation.rst +++ b/docs/visualisation.rst @@ -1,3 +1,5 @@ +.. module:: alchemlyb.visualisation + Visualisation of the results ============================ It is quite often that the user want to visualise the results to gain diff --git a/setup.py b/setup.py index d9930f6d..81f19a13 100755 --- a/setup.py +++ b/setup.py @@ -31,6 +31,7 @@ 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', 'Topic :: Scientific/Engineering', 'Topic :: Scientific/Engineering :: Bio-Informatics', 'Topic :: Scientific/Engineering :: Chemistry', diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index b126f47c..cc20204d 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -8,13 +8,13 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): - ''' The forward and backward convergence of the free energy estimate. + '''Forward and backward convergence of the free energy estimate. - Generate the free energy estimate as a function of time in both - directions, with the specified number of points in the time. For example, - setting `num` to 10 would give the forward convergence which is the free - energy estimate from the first 10%, 20%, 30% .. of the data. The - Backward would give the estimate from the last 10%, 20%, 30% .. of the + Generate the free energy estimate as a function of time in both directions, + with the specified number of equally spaced points in the time. For + example, setting `num` to 10 would give the forward convergence which is + the free energy estimate from the first 10%, 20%, 30%, ... of the data. The + Backward would give the estimate from the last 10%, 20%, 30%, ... of the data. Parameters @@ -45,6 +45,7 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): .. versionadded:: 0.6.0 + ''' logger = logging.getLogger('alchemlyb.convergence.' 'forward_backward_convergence') diff --git a/src/alchemlyb/parsing/gmx.py b/src/alchemlyb/parsing/gmx.py index 3e3678b6..7b9671a6 100644 --- a/src/alchemlyb/parsing/gmx.py +++ b/src/alchemlyb/parsing/gmx.py @@ -305,8 +305,15 @@ def _extract_dataframe(xvg, headers=None): header_cnt = len(headers['_raw_lines']) df = pd.read_csv(xvg, sep=r"\s+", header=None, skiprows=header_cnt, - na_filter=True, memory_map=True, names=cols, dtype=np.float64, - float_precision='high') + memory_map=True, on_bad_lines='skip') + # If names=cols is passed to read_csv, rows with more than the + # designated columns will be truncated and used instead of discarded. + df.rename(columns={i: name for i, name in enumerate(cols)}, inplace=True) + # If dtype=np.float64 and float_precision='high' are passed to read_csv, + # 12.345.56 and - cannot be read. + df = df.apply(pd.to_numeric, errors='coerce') + # drop duplicate + df.dropna(inplace=True) # drop duplicated columns (see PR #86 https://github.com/alchemistry/alchemlyb/pull/86/) df = df[df.columns[~df.columns.str.endswith("[duplicated]")]] diff --git a/src/alchemlyb/tests/parsing/test_gmx.py b/src/alchemlyb/tests/parsing/test_gmx.py index 063aafe1..e3bb752d 100644 --- a/src/alchemlyb/tests/parsing/test_gmx.py +++ b/src/alchemlyb/tests/parsing/test_gmx.py @@ -2,6 +2,9 @@ """ +import bz2 +import pytest + from alchemlyb.parsing.gmx import extract_dHdl, extract_u_nk from alchemtest.gmx import load_benzene from alchemtest.gmx import load_expanded_ensemble_case_1, load_expanded_ensemble_case_2, load_expanded_ensemble_case_3 @@ -195,3 +198,60 @@ def test_extract_dHdl_unit(): dhdl = extract_dHdl(dataset['data']['Coulomb'][0], 310) assert dhdl.attrs['temperature'] == 310 assert dhdl.attrs['energy_unit'] == 'kT' + +class TestRobustGMX(): + '''Test dropping the row that is wrong in different way''' + @staticmethod + @pytest.fixture(scope='class') + def data(): + dhdl = extract_dHdl(load_benzene()['data']['Coulomb'][0], 310) + with bz2.open(load_benzene()['data']['Coulomb'][0], "rt") as bz_file: + text = bz_file.read() + return text, len(dhdl) + + def test_sanity(self, data, tmpdir): + '''Test if the test routine is working.''' + text, length = data + new_text = tmpdir.join('text.xvg') + new_text.write(text) + dhdl = extract_dHdl(tmpdir.join('text.xvg'), 310) + assert len(dhdl) == length + + def test_truncated_row(self, data, tmpdir): + '''Test the case where the last row has been truncated.''' + text, length = data + new_text = tmpdir.join('text.xvg') + new_text.write(text + '40010.0 27.0\n') + dhdl = extract_dHdl(tmpdir.join('text.xvg'), 310) + assert len(dhdl) == length + + def test_truncated_number(self, data, tmpdir): + '''Test the case where the last row has been truncated and a - has + been left.''' + text, length = data + new_text = tmpdir.join('text.xvg') + new_text.write(text + '40010.0 27.0 -\n') + dhdl = extract_dHdl(tmpdir.join('text.xvg'), 310) + assert len(dhdl) == length + + def test_weirdnumber(self, data, tmpdir): + '''Test the case where the last number has been appended a weird + number.''' + text, length = data + new_text = tmpdir.join('text.xvg') + # Note the 27.040010.0 which is the sum of 27.0 and 40010.0 + new_text.write(text + '40010.0 27.040010.0 27.0 0.0 6.7 13.5 20.2 27.0 0.7 27.0 0.0 6.7 ' + '13.5 20.2 27.0 0.7\n') + dhdl = extract_dHdl(tmpdir.join('text.xvg'), 310) + assert len(dhdl) == length + + def test_too_many_cols(self, data, tmpdir): + '''Test the case where the row has too many columns.''' + text, length = data + new_text = tmpdir.join('text.xvg') + new_text.write(text + + '40010.0 27.0 0.0 6.7 13.5 20.2 27.0 0.7 27.0 0.0 6.7 13.5 20.2 27.0 0.7\n') + dhdl = extract_dHdl(tmpdir.join('text.xvg'), 310) + assert len(dhdl) == length + + From bbd281dba79ce0baa1c4180681fa0a57f5782de1 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 5 Mar 2022 21:07:06 +0000 Subject: [PATCH 061/123] update --- docs/index.rst | 1 + docs/workflows.rst | 16 +++ docs/workflows/alchemlyb.workflows.base.rst | 27 +++++ src/alchemlyb/workflows/__init__.py | 3 + src/alchemlyb/workflows/base.py | 127 ++++++++++++++++++++ 5 files changed, 174 insertions(+) create mode 100644 docs/workflows.rst create mode 100644 docs/workflows/alchemlyb.workflows.base.rst create mode 100644 src/alchemlyb/workflows/__init__.py create mode 100644 src/alchemlyb/workflows/base.py diff --git a/docs/index.rst b/docs/index.rst index 6d42c6a3..0f30b02a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -80,6 +80,7 @@ We also welcome code contributions: have a look at our `Developer Guide`_. Open convergence postprocessing visualisation + workflows .. toctree:: :maxdepth: 1 diff --git a/docs/workflows.rst b/docs/workflows.rst new file mode 100644 index 00000000..3296edae --- /dev/null +++ b/docs/workflows.rst @@ -0,0 +1,16 @@ +Automatic workflow +================== +Though **alchemlyb** is a library offering great flexibility in deriving free +energy estimate, it also provides workflows that provides automatic analysis +of the results and step-by-step version that allows more flexibility. + +For developers, the skeleton of the workflow should follow the example in +:class:`alchemlyb.workflows.base.WorkflowBase`. + +.. currentmodule:: alchemlyb.workflows + +.. autosummary:: + :toctree: workflows + + base + diff --git a/docs/workflows/alchemlyb.workflows.base.rst b/docs/workflows/alchemlyb.workflows.base.rst new file mode 100644 index 00000000..849c210e --- /dev/null +++ b/docs/workflows/alchemlyb.workflows.base.rst @@ -0,0 +1,27 @@ +The base workflow +================= + +The :class:`alchemlyb.workflows.base.WorkflowBase` provides a basic API +template for the workflow development. +The workflow should be able to run in an automatic fashion. :: + + >>> from alchemlyb.workflows.base import WorkflowBase + >>> workflow = WorkflowBase() + >>> workflow.run() + +Three main functions are provided such that the workflow could be run in a +step-by-step fashion. :: + + >>> from alchemlyb.workflows.base import WorkflowBase + >>> workflow = WorkflowBase() + >>> workflow.read() + >>> workflow.subsample() + >>> workflow.estimate() + >>> workflow.convergence() + >>> workflow.plot() + +API Reference +------------- +.. autoclass:: alchemlyb.workflows.base.WorkflowBase + :members: + :inherited-members: diff --git a/src/alchemlyb/workflows/__init__.py b/src/alchemlyb/workflows/__init__.py new file mode 100644 index 00000000..5823a467 --- /dev/null +++ b/src/alchemlyb/workflows/__init__.py @@ -0,0 +1,3 @@ +__all__ = [ + 'base', +] diff --git a/src/alchemlyb/workflows/base.py b/src/alchemlyb/workflows/base.py new file mode 100644 index 00000000..a8a570cc --- /dev/null +++ b/src/alchemlyb/workflows/base.py @@ -0,0 +1,127 @@ +import pandas as pd + +class WorkflowBase(): + """The base class for the Workflow. + + Parameters + ---------- + + units : string, optional + The unit used for printing and plotting results. {'kcal/mol', 'kJ/mol', + 'kT'}. Default: 'kT'. + + software : string, optional + The software used for generating input. {'Gromacs', } + + T : float, optional, + Temperature in K. Default: 298. + + out : string, optional + Directory in which the output files produced by this script will be + stored. Default: './'. + + Attributes + ---------- + + file_list : list + A list of files to be read by the parser. + + """ + def __init__(self, units='kT', software='Gromacs', T=298, out='./', *args, + **kwargs): + + self.T = T + self.software = software + self.unit = units + self.file_list = [] + self.out = out + + def run(self, *args, **kwargs): + """ Run the flow in an automatic fashion. + + Attributes + ---------- + + u_nk_list : list + A list of :class:`pandas.DataFrame` of u_nk. + dHdl_list : list + A list of :class:`pandas.DataFrame` of dHdl. + u_nk_sample_list : list + A list of :class:`pandas.DataFrame` of the subsampled u_nk. + dHdl_sample_list : list + A list of :class:`pandas.DataFrame` of the subsampled dHdl. + result : pandas.Dataframe + The main result of the workflow. + convergence : pandas.Dataframe + The result of the convergence analysis. + + """ + self.u_nk_list = [] + self.dHdl_list = [] + self.dHdl_sample_list = [] + self.u_nk_sample_list = [] + self.result = pd.DataFrame() + self.convergence = pd.DataFrame() + + def read(self, *args, **kwargs): + """ The function that reads the files in `file_list` and parse them + into u_nk and dHdl files. + + Attributes + ---------- + + u_nk_list : list + A list of :class:`pandas.DataFrame` of u_nk. + dHdl_list : list + A list of :class:`pandas.DataFrame` of dHdl. + + """ + self.u_nk_list = [] + self.dHdl_list = [] + + def subsample(self, *args, **kwargs): + """ The function that subsample the u_nk and dHdl in `u_nk_list` and + `dHdl_list`. + + Attributes + ---------- + + u_nk_sample_list : list + A list of :class:`pandas.DataFrame` of the subsampled u_nk. + dHdl_sample_list : list + A list of :class:`pandas.DataFrame` of the subsampled dHdl. + + """ + self.dHdl_sample_list = [] + self.u_nk_sample_list = [] + + def estimate(self, *args, **kwargs): + """ The function that runs the estimator based on `u_nk_sample_list` + and `dHdl_sample_list`. + + Attributes + ---------- + + result : pandas.Dataframe + The main result of the workflow. + + """ + self.result = pd.DataFrame() + + def convergence(self, *args, **kwargs): + """ The function for doing convergence analysis. + + Attributes + ---------- + + convergence : pandas.Dataframe + The result of the convergence analysis. + + """ + self.convergence = pd.DataFrame() + + def plot(self, *args, **kwargs): + """ The function for producing any plots. + + """ + ... From 1c40e979dec632c0c29612ac3bd28ec2851ad2aa Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 6 Mar 2022 10:41:20 +0000 Subject: [PATCH 062/123] test --- src/alchemlyb/tests/test_workflow.py | 32 ++++++++++++++++++++++++++++ src/alchemlyb/workflows/base.py | 11 +++++----- 2 files changed, 37 insertions(+), 6 deletions(-) create mode 100644 src/alchemlyb/tests/test_workflow.py diff --git a/src/alchemlyb/tests/test_workflow.py b/src/alchemlyb/tests/test_workflow.py new file mode 100644 index 00000000..fb6148be --- /dev/null +++ b/src/alchemlyb/tests/test_workflow.py @@ -0,0 +1,32 @@ +import pytest +from alchemlyb.workflows import base +import pandas as pd +from unittest.mock import patch +import os + +class Test_automatic_base(): + @staticmethod + @pytest.fixture(scope='class') + def workflow(): + workflow = base.WorkflowBase() + workflow.run() + yield workflow + + # def test_write(self, workflow, tmpdir): + # '''Patch the output directory to tmpdir''' + # with patch('workflow.out', tmpdir.strpath): + # workflow.result.to_pickle(os.path.join(workflow.out, 'result.pkl')) + + def test_read(self, workflow): + assert len(workflow.u_nk_list) == 0 + assert len(workflow.dHdl_list) == 0 + + def test_subsample(self, workflow): + assert len(workflow.u_nk_sample_list) == 0 + assert len(workflow.dHdl_sample_list) == 0 + + def test_estimator(self, workflow): + assert isinstance(workflow.result, pd.DataFrame) + + def test_convergence(self, workflow): + assert isinstance(workflow.convergence, pd.DataFrame) diff --git a/src/alchemlyb/workflows/base.py b/src/alchemlyb/workflows/base.py index a8a570cc..57d347f1 100644 --- a/src/alchemlyb/workflows/base.py +++ b/src/alchemlyb/workflows/base.py @@ -56,12 +56,11 @@ def run(self, *args, **kwargs): The result of the convergence analysis. """ - self.u_nk_list = [] - self.dHdl_list = [] - self.dHdl_sample_list = [] - self.u_nk_sample_list = [] - self.result = pd.DataFrame() - self.convergence = pd.DataFrame() + self.read() + self.subsample() + self.estimate() + self.convergence() + self.plot() def read(self, *args, **kwargs): """ The function that reads the files in `file_list` and parse them From aecac7eaa0bacb03044bfac8ea0df98aa54077c0 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 13 Mar 2022 10:10:51 +0000 Subject: [PATCH 063/123] Update test_workflow.py --- src/alchemlyb/tests/test_workflow.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow.py b/src/alchemlyb/tests/test_workflow.py index fb6148be..94e61cd2 100644 --- a/src/alchemlyb/tests/test_workflow.py +++ b/src/alchemlyb/tests/test_workflow.py @@ -1,21 +1,21 @@ import pytest from alchemlyb.workflows import base import pandas as pd -from unittest.mock import patch import os class Test_automatic_base(): @staticmethod - @pytest.fixture(scope='class') - def workflow(): - workflow = base.WorkflowBase() + @pytest.fixture(scope='session') + def workflow(tmp_path_factory): + outdir = tmp_path_factory.mktemp("out") + workflow = base.WorkflowBase(out=str(outdir)) workflow.run() - yield workflow + return workflow - # def test_write(self, workflow, tmpdir): - # '''Patch the output directory to tmpdir''' - # with patch('workflow.out', tmpdir.strpath): - # workflow.result.to_pickle(os.path.join(workflow.out, 'result.pkl')) + def test_write(self, workflow, tmpdir): + '''Patch the output directory to tmpdir''' + workflow.result.to_pickle(os.path.join(workflow.out, 'result.pkl')) + assert os.path.exists(os.path.join(workflow.out, 'result.pkl')) def test_read(self, workflow): assert len(workflow.u_nk_list) == 0 From afb22f5c641c0e589541fa52c9c9954fae9cabfc Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 13 Mar 2022 11:46:34 +0000 Subject: [PATCH 064/123] Base (#3) merge base --- docs/index.rst | 2 +- docs/workflows.rst | 16 +++ docs/workflows/alchemlyb.workflows.base.rst | 27 +++++ src/alchemlyb/tests/test_workflow.py | 32 +++++ src/alchemlyb/workflows/__init__.py | 5 +- src/alchemlyb/workflows/base.py | 126 ++++++++++++++++++++ 6 files changed, 206 insertions(+), 2 deletions(-) create mode 100644 docs/workflows.rst create mode 100644 docs/workflows/alchemlyb.workflows.base.rst create mode 100644 src/alchemlyb/tests/test_workflow.py create mode 100644 src/alchemlyb/workflows/base.py diff --git a/docs/index.rst b/docs/index.rst index d873a083..0f30b02a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -80,7 +80,7 @@ We also welcome code contributions: have a look at our `Developer Guide`_. Open convergence postprocessing visualisation - workflow + workflows .. toctree:: :maxdepth: 1 diff --git a/docs/workflows.rst b/docs/workflows.rst new file mode 100644 index 00000000..3296edae --- /dev/null +++ b/docs/workflows.rst @@ -0,0 +1,16 @@ +Automatic workflow +================== +Though **alchemlyb** is a library offering great flexibility in deriving free +energy estimate, it also provides workflows that provides automatic analysis +of the results and step-by-step version that allows more flexibility. + +For developers, the skeleton of the workflow should follow the example in +:class:`alchemlyb.workflows.base.WorkflowBase`. + +.. currentmodule:: alchemlyb.workflows + +.. autosummary:: + :toctree: workflows + + base + diff --git a/docs/workflows/alchemlyb.workflows.base.rst b/docs/workflows/alchemlyb.workflows.base.rst new file mode 100644 index 00000000..849c210e --- /dev/null +++ b/docs/workflows/alchemlyb.workflows.base.rst @@ -0,0 +1,27 @@ +The base workflow +================= + +The :class:`alchemlyb.workflows.base.WorkflowBase` provides a basic API +template for the workflow development. +The workflow should be able to run in an automatic fashion. :: + + >>> from alchemlyb.workflows.base import WorkflowBase + >>> workflow = WorkflowBase() + >>> workflow.run() + +Three main functions are provided such that the workflow could be run in a +step-by-step fashion. :: + + >>> from alchemlyb.workflows.base import WorkflowBase + >>> workflow = WorkflowBase() + >>> workflow.read() + >>> workflow.subsample() + >>> workflow.estimate() + >>> workflow.convergence() + >>> workflow.plot() + +API Reference +------------- +.. autoclass:: alchemlyb.workflows.base.WorkflowBase + :members: + :inherited-members: diff --git a/src/alchemlyb/tests/test_workflow.py b/src/alchemlyb/tests/test_workflow.py new file mode 100644 index 00000000..94e61cd2 --- /dev/null +++ b/src/alchemlyb/tests/test_workflow.py @@ -0,0 +1,32 @@ +import pytest +from alchemlyb.workflows import base +import pandas as pd +import os + +class Test_automatic_base(): + @staticmethod + @pytest.fixture(scope='session') + def workflow(tmp_path_factory): + outdir = tmp_path_factory.mktemp("out") + workflow = base.WorkflowBase(out=str(outdir)) + workflow.run() + return workflow + + def test_write(self, workflow, tmpdir): + '''Patch the output directory to tmpdir''' + workflow.result.to_pickle(os.path.join(workflow.out, 'result.pkl')) + assert os.path.exists(os.path.join(workflow.out, 'result.pkl')) + + def test_read(self, workflow): + assert len(workflow.u_nk_list) == 0 + assert len(workflow.dHdl_list) == 0 + + def test_subsample(self, workflow): + assert len(workflow.u_nk_sample_list) == 0 + assert len(workflow.dHdl_sample_list) == 0 + + def test_estimator(self, workflow): + assert isinstance(workflow.result, pd.DataFrame) + + def test_convergence(self, workflow): + assert isinstance(workflow.convergence, pd.DataFrame) diff --git a/src/alchemlyb/workflows/__init__.py b/src/alchemlyb/workflows/__init__.py index 24e6ab7d..9e81a9e7 100644 --- a/src/alchemlyb/workflows/__init__.py +++ b/src/alchemlyb/workflows/__init__.py @@ -1 +1,4 @@ -from .abfe import ABFE +__all__ = [ + 'base', + 'abfe'. +] diff --git a/src/alchemlyb/workflows/base.py b/src/alchemlyb/workflows/base.py new file mode 100644 index 00000000..57d347f1 --- /dev/null +++ b/src/alchemlyb/workflows/base.py @@ -0,0 +1,126 @@ +import pandas as pd + +class WorkflowBase(): + """The base class for the Workflow. + + Parameters + ---------- + + units : string, optional + The unit used for printing and plotting results. {'kcal/mol', 'kJ/mol', + 'kT'}. Default: 'kT'. + + software : string, optional + The software used for generating input. {'Gromacs', } + + T : float, optional, + Temperature in K. Default: 298. + + out : string, optional + Directory in which the output files produced by this script will be + stored. Default: './'. + + Attributes + ---------- + + file_list : list + A list of files to be read by the parser. + + """ + def __init__(self, units='kT', software='Gromacs', T=298, out='./', *args, + **kwargs): + + self.T = T + self.software = software + self.unit = units + self.file_list = [] + self.out = out + + def run(self, *args, **kwargs): + """ Run the flow in an automatic fashion. + + Attributes + ---------- + + u_nk_list : list + A list of :class:`pandas.DataFrame` of u_nk. + dHdl_list : list + A list of :class:`pandas.DataFrame` of dHdl. + u_nk_sample_list : list + A list of :class:`pandas.DataFrame` of the subsampled u_nk. + dHdl_sample_list : list + A list of :class:`pandas.DataFrame` of the subsampled dHdl. + result : pandas.Dataframe + The main result of the workflow. + convergence : pandas.Dataframe + The result of the convergence analysis. + + """ + self.read() + self.subsample() + self.estimate() + self.convergence() + self.plot() + + def read(self, *args, **kwargs): + """ The function that reads the files in `file_list` and parse them + into u_nk and dHdl files. + + Attributes + ---------- + + u_nk_list : list + A list of :class:`pandas.DataFrame` of u_nk. + dHdl_list : list + A list of :class:`pandas.DataFrame` of dHdl. + + """ + self.u_nk_list = [] + self.dHdl_list = [] + + def subsample(self, *args, **kwargs): + """ The function that subsample the u_nk and dHdl in `u_nk_list` and + `dHdl_list`. + + Attributes + ---------- + + u_nk_sample_list : list + A list of :class:`pandas.DataFrame` of the subsampled u_nk. + dHdl_sample_list : list + A list of :class:`pandas.DataFrame` of the subsampled dHdl. + + """ + self.dHdl_sample_list = [] + self.u_nk_sample_list = [] + + def estimate(self, *args, **kwargs): + """ The function that runs the estimator based on `u_nk_sample_list` + and `dHdl_sample_list`. + + Attributes + ---------- + + result : pandas.Dataframe + The main result of the workflow. + + """ + self.result = pd.DataFrame() + + def convergence(self, *args, **kwargs): + """ The function for doing convergence analysis. + + Attributes + ---------- + + convergence : pandas.Dataframe + The result of the convergence analysis. + + """ + self.convergence = pd.DataFrame() + + def plot(self, *args, **kwargs): + """ The function for producing any plots. + + """ + ... From 09efd7723bdb8afbded02fd30443e55f9fe3f406 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 13 Mar 2022 11:49:01 +0000 Subject: [PATCH 065/123] Revert "Merge branch 'workf' into base" This reverts commit 672a41af66fac3e2b1ead6c6a0c679486a4e070a, reversing changes made to aecac7eaa0bacb03044bfac8ea0df98aa54077c0. --- docs/workflow.rst | 134 ----- src/alchemlyb/convergence/convergence.py | 3 +- src/alchemlyb/parsing/gmx.py | 11 +- src/alchemlyb/tests/parsing/test_gmx.py | 60 -- src/alchemlyb/tests/test_workflow_ABFE.py | 333 ----------- src/alchemlyb/workflows/__init__.py | 1 - src/alchemlyb/workflows/abfe.py | 651 ---------------------- 7 files changed, 3 insertions(+), 1190 deletions(-) delete mode 100644 docs/workflow.rst delete mode 100644 src/alchemlyb/tests/test_workflow_ABFE.py delete mode 100644 src/alchemlyb/workflows/abfe.py diff --git a/docs/workflow.rst b/docs/workflow.rst deleted file mode 100644 index dfc514d6..00000000 --- a/docs/workflow.rst +++ /dev/null @@ -1,134 +0,0 @@ -Automatic workflow -================== -Though **alchemlyb** is a library offering great flexibility in deriving free -energy estimate, it also provide a easy pipeline that is similar to -`Alchemical Analysis `_ and a -step-by-step version that allows more flexibility. - -Note ----- -This is an experimental feature and is not API stable. - -Fully Automatic analysis ------------------------- -A interface similar to -`Alchemical Analysis `_ -could be excuted with a single line of command. :: - - >>> import os - >>> from alchemtest.gmx import load_ABFE - >>> from alchemlyb.workflows import ABFE - >>> # Obtain the path of the data - >>> dir = os.path.dirname(load_ABFE()['data']['complex'][0]) - >>> print(dir) - 'alchemtest/gmx/ABFE/complex' - >>> workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, - >>> prefix='dhdl', suffix='xvg', T=298, skiptime=10, - >>> uncorr='dhdl', threshold=50, - >>> methods=('mbar', 'bar', 'ti'), out='./', - >>> overlap='O_MBAR.pdf', - >>> breakdown=True, forwrev=10, log='result.log') - -This would give the free energy estimate using all of -:class:`~alchemlyb.estimators.TI`, :class:`~alchemlyb.estimators.BAR`, -:class:`~alchemlyb.estimators.MBAR` and the result will be given as -:class:`pandas.DataFrame` to :attr:`alchemlyb.workflows.ABFE.summary` :: - - MBAR MBAR_Error BAR BAR_Error TI TI_Error - States 0 -- 1 0.065967 0.001293 0.066544 0.001661 0.066663 0.001675 - 1 -- 2 0.089774 0.001398 0.089303 0.002101 0.089566 0.002144 - 2 -- 3 0.132036 0.001638 0.132687 0.002990 0.133292 0.003055 - 3 -- 4 0.116494 0.001213 0.116348 0.002691 0.116845 0.002750 - 4 -- 5 0.105251 0.000980 0.106344 0.002337 0.106603 0.002362 - 5 -- 6 0.349320 0.002781 0.343399 0.006839 0.350568 0.007393 - 6 -- 7 0.402346 0.002767 0.391368 0.006641 0.395754 0.006961 - 7 -- 8 0.322284 0.002058 0.319395 0.005333 0.321542 0.005434 - 8 -- 9 0.434999 0.002683 0.425680 0.006823 0.430251 0.007155 - 9 -- 10 0.355672 0.002219 0.350564 0.005472 0.352745 0.005591 - 10 -- 11 3.574227 0.008744 3.513595 0.018711 3.514790 0.018078 - 11 -- 12 2.896685 0.009905 2.821760 0.017844 2.823210 0.018088 - 12 -- 13 2.223769 0.011229 2.188885 0.018438 2.189784 0.018478 - 13 -- 14 1.520978 0.012526 1.493598 0.019155 1.490070 0.019288 - 14 -- 15 0.911279 0.009527 0.894878 0.015023 0.896010 0.015140 - 15 -- 16 0.892365 0.010558 0.886706 0.015260 0.884698 0.015392 - 16 -- 17 1.737971 0.025315 1.720643 0.031416 1.741028 0.030624 - 17 -- 18 1.790706 0.025560 1.788112 0.029435 1.801695 0.029244 - 18 -- 19 1.998635 0.023340 2.007404 0.027447 2.019213 0.027096 - 19 -- 20 2.263475 0.020286 2.265322 0.025023 2.282040 0.024566 - 20 -- 21 2.565680 0.016695 2.561324 0.023611 2.552977 0.023753 - 21 -- 22 1.384094 0.007553 1.385837 0.011672 1.381999 0.011991 - 22 -- 23 1.428567 0.007504 1.422689 0.012524 1.416010 0.013012 - 23 -- 24 1.440581 0.008059 1.412517 0.013125 1.408267 0.013539 - 24 -- 25 1.411329 0.009022 1.419167 0.013356 1.411446 0.013795 - 25 -- 26 1.340320 0.010167 1.360679 0.015213 1.356953 0.015260 - 26 -- 27 1.243745 0.011239 1.245873 0.015711 1.248959 0.015762 - 27 -- 28 1.128429 0.012859 1.124554 0.016999 1.121892 0.016962 - 28 -- 29 1.010313 0.016442 1.005444 0.017692 1.019747 0.017257 - Stages coul 10.215658 0.033903 10.017838 0.041839 10.017854 0.048744 - vdw 22.547489 0.098699 22.501150 0.060092 22.542936 0.106723 - bonded 2.374144 0.014995 2.341631 0.005507 2.363828 0.021078 - TOTAL 35.137291 0.103580 34.860619 0.087022 34.924618 0.119206 - -The :ref:`overlay matrix for the MBAR estimator ` will be -plotted and saved to `O_MBAR.pdf`. - -The :ref:`dHdl for TI ` will be plotted to `dhdl_TI.pdf`. - -The :ref:`dF states ` will be plotted to `dF_state.pdf` in -portrait model and `dF_state_long.pdf` in landscape model. - -The forward and backward convergence will be plotted to `dF_t.pdf` using -:class:`~alchemlyb.estimators.MBAR`. - -.. currentmodule:: alchemlyb.workflows - -.. autoclass:: ABFE - :noindex: - -Semi-automatic analysis ------------------------ -The same analysis could also performed in steps allowing access and modification -to the data generated at each stage of the analysis. :: - - >>> import os - >>> from alchemtest.gmx import load_ABFE - >>> from alchemlyb.workflows import ABFE - >>> # Obtain the path of the data - >>> dir = os.path.dirname(load_ABFE()['data']['complex'][0]) - >>> print(dir) - 'alchemtest/gmx/ABFE/complex' - >>> # Load the data - >>> workflow = ABFE(software='Gromacs', dir=dir, - >>> prefix='dhdl', suffix='xvg', T=298, out='./', - >>> log='result.log') - >>> # Set the unit. - >>> workflow.update_units('kcal/mol') - >>> # Decorrelate the data. - >>> workflow.preprocess(skiptime=10, uncorr='dhdl', threshold=50) - >>> # Run the estimator - >>> workflow.estimate(methods=('mbar', 'bar', 'ti')) - >>> # Generate the results - >>> summary = workflow.generate_result() - >>> # Plot the overlap matrix - >>> workflow.plot_overlap_matrix(overlap='O_MBAR.pdf') - >>> # Plot the dHdl for TI - >>> workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') - >>> # Plot the dF states - >>> workflow.plot_dF_state(dF_state='dF_state.pdf') - >>> # Convergence analysis - >>> workflow.check_convergence(10, dF_t='dF_t.pdf') - - - -.. currentmodule:: alchemlyb.workflows.ABFE - -.. autofunction:: update_units -.. autofunction:: preprocess -.. autofunction:: estimate -.. autofunction:: generate_result -.. autofunction:: plot_overlap_matrix -.. autofunction:: plot_ti_dhdl -.. autofunction:: plot_dF_state -.. autofunction:: check_convergence - -.. _Alchemical Analysis: https://github.com/MobleyLab/alchemical-analysis \ No newline at end of file diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index cc20204d..e626f2dc 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -2,8 +2,7 @@ import logging import numpy as np -from ..estimators import BAR, TI -from ..estimators import AutoMBAR as MBAR +from ..estimators import MBAR, BAR, TI from .. import concat diff --git a/src/alchemlyb/parsing/gmx.py b/src/alchemlyb/parsing/gmx.py index 7b9671a6..3e3678b6 100644 --- a/src/alchemlyb/parsing/gmx.py +++ b/src/alchemlyb/parsing/gmx.py @@ -305,15 +305,8 @@ def _extract_dataframe(xvg, headers=None): header_cnt = len(headers['_raw_lines']) df = pd.read_csv(xvg, sep=r"\s+", header=None, skiprows=header_cnt, - memory_map=True, on_bad_lines='skip') - # If names=cols is passed to read_csv, rows with more than the - # designated columns will be truncated and used instead of discarded. - df.rename(columns={i: name for i, name in enumerate(cols)}, inplace=True) - # If dtype=np.float64 and float_precision='high' are passed to read_csv, - # 12.345.56 and - cannot be read. - df = df.apply(pd.to_numeric, errors='coerce') - # drop duplicate - df.dropna(inplace=True) + na_filter=True, memory_map=True, names=cols, dtype=np.float64, + float_precision='high') # drop duplicated columns (see PR #86 https://github.com/alchemistry/alchemlyb/pull/86/) df = df[df.columns[~df.columns.str.endswith("[duplicated]")]] diff --git a/src/alchemlyb/tests/parsing/test_gmx.py b/src/alchemlyb/tests/parsing/test_gmx.py index e3bb752d..063aafe1 100644 --- a/src/alchemlyb/tests/parsing/test_gmx.py +++ b/src/alchemlyb/tests/parsing/test_gmx.py @@ -2,9 +2,6 @@ """ -import bz2 -import pytest - from alchemlyb.parsing.gmx import extract_dHdl, extract_u_nk from alchemtest.gmx import load_benzene from alchemtest.gmx import load_expanded_ensemble_case_1, load_expanded_ensemble_case_2, load_expanded_ensemble_case_3 @@ -198,60 +195,3 @@ def test_extract_dHdl_unit(): dhdl = extract_dHdl(dataset['data']['Coulomb'][0], 310) assert dhdl.attrs['temperature'] == 310 assert dhdl.attrs['energy_unit'] == 'kT' - -class TestRobustGMX(): - '''Test dropping the row that is wrong in different way''' - @staticmethod - @pytest.fixture(scope='class') - def data(): - dhdl = extract_dHdl(load_benzene()['data']['Coulomb'][0], 310) - with bz2.open(load_benzene()['data']['Coulomb'][0], "rt") as bz_file: - text = bz_file.read() - return text, len(dhdl) - - def test_sanity(self, data, tmpdir): - '''Test if the test routine is working.''' - text, length = data - new_text = tmpdir.join('text.xvg') - new_text.write(text) - dhdl = extract_dHdl(tmpdir.join('text.xvg'), 310) - assert len(dhdl) == length - - def test_truncated_row(self, data, tmpdir): - '''Test the case where the last row has been truncated.''' - text, length = data - new_text = tmpdir.join('text.xvg') - new_text.write(text + '40010.0 27.0\n') - dhdl = extract_dHdl(tmpdir.join('text.xvg'), 310) - assert len(dhdl) == length - - def test_truncated_number(self, data, tmpdir): - '''Test the case where the last row has been truncated and a - has - been left.''' - text, length = data - new_text = tmpdir.join('text.xvg') - new_text.write(text + '40010.0 27.0 -\n') - dhdl = extract_dHdl(tmpdir.join('text.xvg'), 310) - assert len(dhdl) == length - - def test_weirdnumber(self, data, tmpdir): - '''Test the case where the last number has been appended a weird - number.''' - text, length = data - new_text = tmpdir.join('text.xvg') - # Note the 27.040010.0 which is the sum of 27.0 and 40010.0 - new_text.write(text + '40010.0 27.040010.0 27.0 0.0 6.7 13.5 20.2 27.0 0.7 27.0 0.0 6.7 ' - '13.5 20.2 27.0 0.7\n') - dhdl = extract_dHdl(tmpdir.join('text.xvg'), 310) - assert len(dhdl) == length - - def test_too_many_cols(self, data, tmpdir): - '''Test the case where the row has too many columns.''' - text, length = data - new_text = tmpdir.join('text.xvg') - new_text.write(text + - '40010.0 27.0 0.0 6.7 13.5 20.2 27.0 0.7 27.0 0.0 6.7 13.5 20.2 27.0 0.7\n') - dhdl = extract_dHdl(tmpdir.join('text.xvg'), 310) - assert len(dhdl) == length - - diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py deleted file mode 100644 index d445ef0e..00000000 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ /dev/null @@ -1,333 +0,0 @@ -import numpy as np -import pytest -import os - -from alchemlyb.workflows import ABFE -from alchemtest.gmx import load_ABFE, load_benzene - -class Test_automatic_ABFE(): - '''Test the full automatic workflow for load_ABFE from alchemtest.gmx for - three stage transformation.''' - - @staticmethod - @pytest.fixture(scope='class') - def workflow(): - dir = os.path.dirname(load_ABFE()['data']['complex'][0]) - workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, - prefix='dhdl', suffix='xvg', T=310, skiptime=10, - uncorr='dhdl', threshold=50, - methods=('mbar', 'bar', 'ti'), out='./', - overlap='O_MBAR.pdf', - breakdown=True, forwrev=10, log='result.log') - return workflow - - def test_read(self, workflow): - '''test if the files has been loaded correctly.''' - assert len(workflow.u_nk_list) == 30 - assert len(workflow.dHdl_list) == 30 - assert all([len(u_nk) == 1001 for u_nk in workflow.u_nk_list]) - assert all([len(dHdl) == 1001 for dHdl in workflow.dHdl_list]) - - def test_subsample(self, workflow): - '''Test if the data has been shrinked by subsampling.''' - assert len(workflow.u_nk_sample_list) == 30 - assert len(workflow.dHdl_sample_list) == 30 - assert all([len(u_nk) < 1001 for u_nk in workflow.u_nk_sample_list]) - assert all([len(dHdl) < 1001 for dHdl in workflow.dHdl_sample_list]) - - def test_estomator(self, workflow): - '''Test if all three estimator has been used.''' - assert len(workflow.estimator) == 3 - assert 'mbar' in workflow.estimator - assert 'ti' in workflow.estimator - assert 'bar' in workflow.estimator - - def test_summary(self, workflow): - '''Test if if the summary is right.''' - summary = workflow.generate_result() - assert np.isclose(summary['MBAR']['Stages']['TOTAL'], 21.788, 0.1) - - def test_O_MBAR(self, workflow): - '''test if the O_MBAR.pdf has been plotted.''' - assert os.path.isfile('O_MBAR.pdf') - os.remove('O_MBAR.pdf') - - def test_dhdl_TI(self, workflow): - '''test if the dhdl_TI.pdf has been plotted.''' - assert os.path.isfile('dhdl_TI.pdf') - os.remove('dhdl_TI.pdf') - - def test_dF_state(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile('dF_state.pdf') - os.remove('dF_state.pdf') - assert os.path.isfile('dF_state_long.pdf') - os.remove('dF_state_long.pdf') - - def test_convergence(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile('dF_t.pdf') - os.remove('dF_t.pdf') - assert len(workflow.convergence) == 10 - -class Test_manual_ABFE(): - '''Test the manual workflow for load_ABFE from alchemtest.gmx for three - stage transformation.''' - - @staticmethod - @pytest.fixture(scope='class') - def workflow(): - dir = os.path.dirname(load_ABFE()['data']['complex'][0]) - workflow = ABFE(software='Gromacs', dir=dir, prefix='dhdl', - suffix='xvg', T=310) - workflow.update_units('kcal/mol') - workflow.preprocess(skiptime=10, uncorr='dhdl', threshold=50) - workflow.estimate(methods=('mbar', 'bar', 'ti')) - workflow.plot_overlap_matrix(overlap='O_MBAR.pdf') - workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') - workflow.plot_dF_state(dF_state='dF_state.pdf') - workflow.check_convergence(10, dF_t='dF_t.pdf') - return workflow - - def test_read(self, workflow): - '''test if the files has been loaded correctly.''' - assert len(workflow.u_nk_list) == 30 - assert len(workflow.dHdl_list) == 30 - assert all([len(u_nk) == 1001 for u_nk in workflow.u_nk_list]) - assert all([len(dHdl) == 1001 for dHdl in workflow.dHdl_list]) - - def test_subsample(self, workflow): - '''Test if the data has been shrinked by subsampling.''' - assert len(workflow.u_nk_sample_list) == 30 - assert len(workflow.dHdl_sample_list) == 30 - assert all([len(u_nk) < 1001 for u_nk in workflow.u_nk_sample_list]) - assert all([len(dHdl) < 1001 for dHdl in workflow.dHdl_sample_list]) - - def test_estomator(self, workflow): - '''Test if all three estimator has been used.''' - assert len(workflow.estimator) == 3 - assert 'mbar' in workflow.estimator - assert 'ti' in workflow.estimator - assert 'bar' in workflow.estimator - - def test_O_MBAR(self, workflow): - '''test if the O_MBAR.pdf has been plotted.''' - assert os.path.isfile('O_MBAR.pdf') - os.remove('O_MBAR.pdf') - - def test_dhdl_TI(self, workflow): - '''test if the dhdl_TI.pdf has been plotted.''' - assert os.path.isfile('dhdl_TI.pdf') - os.remove('dhdl_TI.pdf') - - def test_dF_state(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile('dF_state.pdf') - os.remove('dF_state.pdf') - - def test_convergence(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile('dF_t.pdf') - os.remove('dF_t.pdf') - assert len(workflow.convergence) == 10 - - def test_convergence_nosample_u_nk(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - u_nk_sample_list = workflow.u_nk_sample_list - delattr(workflow, 'u_nk_sample_list') - workflow.check_convergence(10) - os.remove('dF_t.pdf') - assert len(workflow.convergence) == 10 - workflow.u_nk_sample_list = u_nk_sample_list - - def test_convergence_nosample_dhdl(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - dHdl_sample_list = workflow.dHdl_sample_list - delattr(workflow, 'dHdl_sample_list') - workflow.check_convergence(10, estimator='ti') - os.remove('dF_t.pdf') - assert len(workflow.convergence) == 10 - workflow.dHdl_sample_list = dHdl_sample_list - - def test_convergence_dhdl(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - workflow.check_convergence(10, estimator='ti') - os.remove('dF_t.pdf') - assert len(workflow.convergence) == 10 - - def test_convergence_TI(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - workflow.check_convergence(10, estimator='ti', dF_t='dF_t.pdf') - assert os.path.isfile('dF_t.pdf') - os.remove('dF_t.pdf') - assert len(workflow.convergence) == 10 - - def test_dhdl_TI_noTI(self, workflow): - '''Test to plot the dhdl_TI when ti estimator is not there''' - full_estimator = workflow.estimator - workflow.estimator.pop('ti') - workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') - assert os.path.isfile('dhdl_TI.pdf') == False - workflow.estimator = full_estimator - -class Test_automatic_benzene(): - '''Test the full automatic workflow for load_benzene from alchemtest.gmx for - single stage transformation.''' - - @staticmethod - @pytest.fixture(scope='class') - def workflow(): - dir = os.path.dirname(os.path.dirname( - load_benzene()['data']['Coulomb'][0])) - dir = os.path.join(dir, '*') - workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, - prefix='dhdl', suffix='bz2', T=310, skiptime=0, - uncorr='dhdl', threshold=50, - methods=('mbar', 'bar', 'ti'), out='./', - overlap='O_MBAR.pdf', - breakdown=True, forwrev=10, log='result.log') - return workflow - - def test_read(self, workflow): - '''test if the files has been loaded correctly.''' - assert len(workflow.u_nk_list) == 5 - assert len(workflow.dHdl_list) == 5 - assert all([len(u_nk) == 4001 for u_nk in workflow.u_nk_list]) - assert all([len(dHdl) == 4001 for dHdl in workflow.dHdl_list]) - - def test_estomator(self, workflow): - '''Test if all three estimator has been used.''' - assert len(workflow.estimator) == 3 - assert 'mbar' in workflow.estimator - assert 'ti' in workflow.estimator - assert 'bar' in workflow.estimator - - def test_O_MBAR(self, workflow): - '''test if the O_MBAR.pdf has been plotted.''' - assert os.path.isfile('O_MBAR.pdf') - os.remove('O_MBAR.pdf') - - def test_dhdl_TI(self, workflow): - '''test if the dhdl_TI.pdf has been plotted.''' - assert os.path.isfile('dhdl_TI.pdf') - os.remove('dhdl_TI.pdf') - - def test_dF_state(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile('dF_state.pdf') - os.remove('dF_state.pdf') - assert os.path.isfile('dF_state_long.pdf') - os.remove('dF_state_long.pdf') - - def test_convergence(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile('dF_t.pdf') - os.remove('dF_t.pdf') - assert len(workflow.convergence) == 10 - -class Test_unpertubed_lambda(): - '''Test the if two lamdas present and one of them is not pertubed. - - fep bound -time fep-lambda bound-lambda -0.0 0.5 0 12.958159 0 -10.0 0.5 0 -1.062968 0 -20.0 0.5 0 1.019020 0 -30.0 0.5 0 5.029051 0 -40.0 0.5 0 7.768072 0 - -Where only fep-lambda changes but the bonded-lambda is always 0. - ''' - - @staticmethod - @pytest.fixture(scope='class') - def workflow(): - dir = os.path.dirname(os.path.dirname( - load_benzene()['data']['Coulomb'][0])) - dir = os.path.join(dir, '*') - workflow = ABFE(software='Gromacs', dir=dir, prefix='dhdl', - suffix='bz2', T=310) - # Block the n_uk - workflow.u_nk_list = [] - # Add another lambda column - for dHdl in workflow.dHdl_list: - dHdl.insert(1, 'bound-lambda', [1.0, ] * len(dHdl)) - dHdl.insert(1, 'bound', [1.0, ] * len(dHdl)) - dHdl.set_index('bound-lambda', append=True, inplace=True) - - workflow.estimate(methods=('ti', )) - workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') - workflow.plot_dF_state(dF_state='dF_state.pdf') - workflow.check_convergence(10, dF_t='dF_t.pdf', estimator='ti') - return workflow - - def test_dhdl_TI(self, workflow): - '''test if the dhdl_TI.pdf has been plotted.''' - assert os.path.isfile('dhdl_TI.pdf') - os.remove('dhdl_TI.pdf') - - def test_dF_state(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile('dF_state.pdf') - os.remove('dF_state.pdf') - - def test_convergence(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile('dF_t.pdf') - os.remove('dF_t.pdf') - assert len(workflow.convergence) == 10 - - def test_single_estimator_mbar(self, workflow): - workflow.estimate(methods='ti') - summary = workflow.generate_result() - assert np.isclose(summary['TI']['Stages']['TOTAL'], 2.946, 0.1) - -class Test_methods(): - '''Test various methods.''' - - @staticmethod - @pytest.fixture(scope='class') - def workflow(): - dir = os.path.dirname(os.path.dirname( - load_benzene()['data']['Coulomb'][0])) - dir = os.path.join(dir, '*') - workflow = ABFE(software='Gromacs', dir=dir, prefix='dhdl', - suffix='bz2', T=310) - return workflow - - def test_uncorr_threshold(self, workflow): - original_u_nk = workflow.u_nk_list - original_dHdl = workflow.dHdl_list - workflow.u_nk_list = [u_nk[:40] for u_nk in original_u_nk] - workflow.dHdl_list = [dHdl[:40] for dHdl in original_dHdl] - workflow.preprocess(threshold=50) - assert all([len(u_nk) == 40 for u_nk in workflow.u_nk_sample_list]) - assert all([len(dHdl) == 40 for dHdl in workflow.dHdl_sample_list]) - workflow.u_nk_list = original_u_nk - workflow.dHdl_list = original_dHdl - - def test_single_estimator_mbar(self, workflow): - workflow.estimate(methods='mbar') - assert len(workflow.estimator) == 1 - assert 'mbar' in workflow.estimator - summary = workflow.generate_result() - assert np.isclose(summary['MBAR']['Stages']['TOTAL'], 2.946, 0.1) - - def test_single_estimator_ti(self, workflow): - u_nk_list = workflow.u_nk_list - delattr(workflow, 'u_nk_list') - workflow.estimate(methods='ti') - summary = workflow.generate_result() - assert np.isclose(summary['TI']['Stages']['TOTAL'], 2.946, 0.1) - workflow.u_nk_list = u_nk_list - - def test_bar_convergence(self, workflow): - workflow.check_convergence(10, estimator='bar') - assert os.path.isfile('dF_t.pdf') - os.remove('dF_t.pdf') - - def test_unprocessed_n_uk(self, workflow): - workflow.u_nk_sample_list = [] - workflow.estimate() - assert len(workflow.estimator) == 3 - assert 'mbar' in workflow.estimator diff --git a/src/alchemlyb/workflows/__init__.py b/src/alchemlyb/workflows/__init__.py index 9e81a9e7..5823a467 100644 --- a/src/alchemlyb/workflows/__init__.py +++ b/src/alchemlyb/workflows/__init__.py @@ -1,4 +1,3 @@ __all__ = [ 'base', - 'abfe'. ] diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py deleted file mode 100644 index 64282b69..00000000 --- a/src/alchemlyb/workflows/abfe.py +++ /dev/null @@ -1,651 +0,0 @@ -import os -from os.path import join -from glob import glob -import pandas as pd -import numpy as np -import logging -import matplotlib.pyplot as plt - -from ..parsing import gmx -from ..preprocessing.subsampling import decorrelate_dhdl, decorrelate_u_nk -from ..estimators import BAR, TI -from ..estimators import AutoMBAR as MBAR -from ..visualisation import (plot_mbar_overlap_matrix, plot_ti_dhdl, - plot_dF_state, plot_convergence) -from ..postprocessors.units import get_unit_converter -from ..convergence import forward_backward_convergence -from .. import concat -from .. import __version__ - - -class ABFE(): - '''Alchemical Analysis style automatic workflow. - - Parameters - ---------- - units : str - The unit used for printing and plotting results. {'kcal/mol', 'kJ/mol', - 'kT'}. Default: 'kT'. - software : str - The software used for generating input. {'Gromacs', } - dir : str - Directory in which data files are stored. Default: os.path.curdir. - prefix : str - Prefix for datafile sets. Default: 'dhdl'. - suffix : str - Suffix for datafile sets. Default: 'xvg'. - T : float - Temperature in K. Default: 298. - skiptime : float - Discard data prior to this specified time as 'equilibration' data. Units - picoseconds. Default: 0. - uncorr : str - The observable to be used for the autocorrelation analysis; 'dhdl' - (obtained as a sum over those energy components that are changing). - Default: `dhdl` - threshold : int - Proceed with correlated samples if the number of uncorrelated samples is - found to be less than this number. If 0 is given, the time series - analysis will not be performed at all. Default: 50. - methods : str - A list of the methods to esitimate the free energy with. Default: None. - out : str - Directory in which the output files produced by this script will be - stored. Default: os.path.curdir. - overlap : str - The filename for the plot of overlap matrix. Default: None. (not - plotting). - breakdown : bool - Plot the free energy differences evaluated for each pair of adjacent - states for all methods, including the dH/dlambda curve for TI. Default: - None. (not plotting). - forwrev : int - Plot the free energy change as a function of time in both directions, - with the specified number of points in the time plot. The number of time - points (an integer) must be provided. Default: None. (not doing - convergence analysis). - log : str - The filename of the log file. The workflow logs under - alchemlyb.workflows.ABFE. Default: - 'result.log' - - Attributes - ---------- - logger : Logger - The logging object. - file_list : list - The list of filenames sorted by the lambda state. - u_nk_list : list - The list of u_nk read from the files. - dHdl_list : list - The list of dHdl read from the files. - ''' - def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, - prefix='dhdl', suffix='xvg', T=298, skiptime=0, uncorr=None, - threshold=50, methods=None, out=os.path.curdir, - overlap=None, breakdown=None, forwrev=None, - log='result.log'): - - logging.basicConfig(filename=log, level=logging.INFO) - self.logger = logging.getLogger('alchemlyb.workflows.ABFE') - self.logger.info('Initialise Alchemlyb ABFE Workflow') - self.logger.info('Alchemlyb Version: {}'.format(__version__)) - - self.logger.info('Set temperature to {} K.'.format(T)) - self.T = T - self.out = out - - self.update_units(units) - - self.logger.info('Finding files with prefix: {}, suffix: {} under ' - 'directory {} produced by {}'.format(prefix, suffix, - dir, software)) - file_list = glob(join(dir, prefix + '*' + suffix)) - - self.logger.info('Found {} xvg files.'.format(len(file_list))) - self.logger.info('Unsorted file list: \n{}'.format('\n'.join( - file_list))) - - if software.lower() == 'gromacs': - self.logger.info('Using {} parser to read the data.'.format( - software)) - extract_u_nk = gmx.extract_u_nk - extract_dHdl = gmx.extract_dHdl - else: # pragma: no cover - raise NameError('{} parser not found.'.format(software)) - - u_nk_list = [] - dHdl_list = [] - for xvg in file_list: - try: - u_nk = extract_u_nk(xvg, T=T) - self.logger.info( - 'Reading {} lines of u_nk from {}'.format(len(u_nk), xvg)) - u_nk_list.append(u_nk) - except: # pragma: no cover - self.logger.warning( - 'Error reading read u_nk from {}.'.format(xvg)) - - try: - dhdl = extract_dHdl(xvg, T=T) - self.logger.info( - 'Reading {} lines of dhdl from {}'.format(len(dhdl), xvg)) - dHdl_list.append(dhdl) - except: # pragma: no cover - self.logger.warning( - 'Error reading read dhdl from {}.'.format(xvg)) - - # Sort the files according to the state - if len(u_nk_list) > 0: - self.logger.info('Sort files according to the u_nk.') - column_names = u_nk_list[0].columns.values.tolist() - index_list = sorted(range(len(file_list)), - key=lambda x:column_names.index( - u_nk_list[x].reset_index('time').index.values[0])) - else: - self.logger.info('Sort files according to the dHdl.') - column_names = sorted([dHdl.reset_index('time').index.values[0] - for dHdl in dHdl_list]) - index_list = sorted(range(len(file_list)), - key=lambda x:column_names.index( - dHdl_list[x].reset_index('time').index.values[0])) - - self.file_list = [file_list[i] for i in index_list] - self.logger.info('Sorted file list: \n{}'.format('\n'.join( - self.file_list))) - self.u_nk_list = [u_nk_list[i] for i in index_list] - self.dHdl_list = [dHdl_list[i] for i in index_list] - - if uncorr is not None: - self.preprocess(skiptime=skiptime, uncorr=uncorr, - threshold=threshold) - if methods is not None: - self.estimate(methods) - self.generate_result() - - if overlap is not None: - ax = self.plot_overlap_matrix(overlap) - plt.close(ax.figure) - - if breakdown: - ax = self.plot_ti_dhdl() - plt.close(ax.figure) - fig = self.plot_dF_state() - plt.close(fig) - fig = self.plot_dF_state(dF_state='dF_state_long.pdf', - orientation='landscape') - plt.close(fig) - - if forwrev is not None: - ax = self.check_convergence(forwrev, estimator='mbar', dF_t='dF_t.pdf') - plt.close(ax.figure) - - - def update_units(self, units): - '''Update the unit. - - Parameters - ---------- - units : str - The unit used for printing and plotting results. {'kcal/mol', - 'kJ/mol', 'kT'} - - ''' - if units is not None: - self.logger.info('Set unit to {}.'.format(units)) - self.units = units - else: # pragma: no cover - pass - - def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): - '''Preprocess the data by removing the equilibration time and - decorrelate the date. - - Parameters - ---------- - skiptime : float - Discard data prior to this specified time as 'equilibration' data. - Units picoseconds. Default: 0. - uncorr : str - The observable to be used for the autocorrelation analysis; 'dhdl' - (obtained as a sum over those energy components that are changing). - Default: `dhdl` - threshold : int - Proceed with correlated samples if the number of uncorrelated - samples is found to be less than this number. If 0 is given, the - time series analysis will not be performed at all. Default: 50. - - Attributes - ---------- - u_nk_sample_list : list - The list of u_nk after decorrelation. - dHdl_sample_list : list - The list of dHdl after decorrelation. - ''' - self.logger.info('Start preprocessing with skiptime of {} ' - 'uncorrelation method of {} and ' - 'threshold of {}'.format(skiptime, uncorr, threshold)) - if len(self.u_nk_list) > 0: - self.logger.info( - 'Processing the u_nk data set with skiptime of {}.'.format( - skiptime)) - - self.u_nk_sample_list = [] - for index, u_nk in enumerate(self.u_nk_list): - # Find the starting frame - - u_nk = u_nk[u_nk.index.get_level_values('time') >= skiptime] - subsample = decorrelate_u_nk(u_nk, uncorr) - - if len(subsample) < threshold: - self.logger.warning('Number of u_nk {} for state {} is ' - 'less than the threshold {}.'.format( - len(subsample), index, threshold)) - self.logger.info('Take all the u_nk for state {}.'.format(index)) - self.u_nk_sample_list.append(u_nk) - else: - self.logger.info('Take {} uncorrelated u_nk for state ' - '{}.'.format(len(subsample), index)) - self.u_nk_sample_list.append(subsample) - else: # pragma: no cover - self.logger.info('No u_nk data being subsampled') - - if len(self.dHdl_list) > 0: - self.dHdl_sample_list = [] - for index, dHdl in enumerate(self.dHdl_list): - dHdl = dHdl[dHdl.index.get_level_values('time') >= skiptime] - subsample = decorrelate_dhdl(dHdl) - if len(subsample) < threshold: - self.logger.warning('Number of dHdl {} for state {} is ' - 'less than the threshold {}.'.format( - len(subsample), index, threshold)) - self.logger.info('Take all the dHdl for state {}.'.format(index)) - self.dHdl_sample_list.append(dHdl) - else: - self.logger.info('Take {} uncorrelated dHdl for state ' - '{}.'.format(len(subsample), index)) - self.dHdl_sample_list.append(subsample) - else: # pragma: no cover - self.logger.info('No dHdl data being subsampled') - - def estimate(self, methods=('mbar', 'bar', 'ti')): - '''Estimate the free energy using the selected estimator. - - Parameters - ---------- - methods : str - A list of the methods to esitimate the free energy with. Default: - ['TI', 'BAR', 'MBAR']. - - Attributes - ---------- - estimator : dict - The dictionary of estimators. The key for MBAR is 'mbar', for BAR is - 'bar' and for TI is 'ti'. - ''' - # Make estimators into a tuple - if isinstance(methods, str): - methods = (methods, ) - - self.logger.info( - 'Start running estimator: {}.'.format(','.join(methods))) - self.estimator = {} - # Use unprocessed data if preprocess is not performed. - if 'ti' in methods: - try: - dHdl = concat(self.dHdl_sample_list) - except (AttributeError, ValueError): - dHdl = concat(self.dHdl_list) - self.logger.warning('dHdl has not been preprocessed.') - self.logger.info( - 'A total {} lines of dHdl is used.'.format(len(dHdl))) - - if 'bar' in methods or 'mbar' in methods: - try: - u_nk = concat(self.u_nk_sample_list) - except (AttributeError, ValueError): - u_nk = concat(self.u_nk_list) - self.logger.warning('u_nk has not been preprocessed.') - self.logger.info( - 'A total {} lines of u_nk is used.'.format(len(u_nk))) - - for estimator in methods: - if estimator.lower() == 'mbar' and len(u_nk) > 0: - self.logger.info('Run MBAR estimator.') - self.estimator['mbar'] = MBAR().fit(u_nk) - elif estimator.lower() == 'bar' and len(u_nk) > 0: - self.logger.info('Run BAR estimator.') - self.estimator['bar'] = BAR().fit(u_nk) - elif estimator.lower() == 'ti' and len(dHdl) > 0: - self.logger.info('Run TI estimator.') - self.estimator['ti'] = TI().fit(dHdl) - elif estimator.lower() == 'mbar' or estimator.lower() == 'bar': # pragma: no cover - self.logger.warning('MBAR or BAR estimator require u_nk') - else: # pragma: no cover - self.logger.warning( - '{} is not a valid estimator.'.format(estimator)) - - def generate_result(self): - '''Summarise the result into a dataframe. - - Returns - ------- - DataFrame - The DataFrame with convergence data. :: - - MBAR MBAR_Error BAR BAR_Error TI TI_Error - States 0 -- 1 0.065967 0.001293 0.066544 0.001661 0.066663 0.001675 - 1 -- 2 0.089774 0.001398 0.089303 0.002101 0.089566 0.002144 - 2 -- 3 0.132036 0.001638 0.132687 0.002990 0.133292 0.003055 - 3 -- 4 0.116494 0.001213 0.116348 0.002691 0.116845 0.002750 - 4 -- 5 0.105251 0.000980 0.106344 0.002337 0.106603 0.002362 - 5 -- 6 0.349320 0.002781 0.343399 0.006839 0.350568 0.007393 - 6 -- 7 0.402346 0.002767 0.391368 0.006641 0.395754 0.006961 - 7 -- 8 0.322284 0.002058 0.319395 0.005333 0.321542 0.005434 - 8 -- 9 0.434999 0.002683 0.425680 0.006823 0.430251 0.007155 - 9 -- 10 0.355672 0.002219 0.350564 0.005472 0.352745 0.005591 - 10 -- 11 3.574227 0.008744 3.513595 0.018711 3.514790 0.018078 - 11 -- 12 2.896685 0.009905 2.821760 0.017844 2.823210 0.018088 - 12 -- 13 2.223769 0.011229 2.188885 0.018438 2.189784 0.018478 - 13 -- 14 1.520978 0.012526 1.493598 0.019155 1.490070 0.019288 - 14 -- 15 0.911279 0.009527 0.894878 0.015023 0.896010 0.015140 - 15 -- 16 0.892365 0.010558 0.886706 0.015260 0.884698 0.015392 - 16 -- 17 1.737971 0.025315 1.720643 0.031416 1.741028 0.030624 - 17 -- 18 1.790706 0.025560 1.788112 0.029435 1.801695 0.029244 - 18 -- 19 1.998635 0.023340 2.007404 0.027447 2.019213 0.027096 - 19 -- 20 2.263475 0.020286 2.265322 0.025023 2.282040 0.024566 - 20 -- 21 2.565680 0.016695 2.561324 0.023611 2.552977 0.023753 - 21 -- 22 1.384094 0.007553 1.385837 0.011672 1.381999 0.011991 - 22 -- 23 1.428567 0.007504 1.422689 0.012524 1.416010 0.013012 - 23 -- 24 1.440581 0.008059 1.412517 0.013125 1.408267 0.013539 - 24 -- 25 1.411329 0.009022 1.419167 0.013356 1.411446 0.013795 - 25 -- 26 1.340320 0.010167 1.360679 0.015213 1.356953 0.015260 - 26 -- 27 1.243745 0.011239 1.245873 0.015711 1.248959 0.015762 - 27 -- 28 1.128429 0.012859 1.124554 0.016999 1.121892 0.016962 - 28 -- 29 1.010313 0.016442 1.005444 0.017692 1.019747 0.017257 - Stages coul 10.215658 0.033903 10.017838 0.041839 10.017854 0.048744 - vdw 22.547489 0.098699 22.501150 0.060092 22.542936 0.106723 - bonded 2.374144 0.014995 2.341631 0.005507 2.363828 0.021078 - TOTAL 35.137291 0.103580 34.860619 0.087022 34.924618 0.119206 - - Attributes - ---------- - summary : Dataframe - The summary of the free energy estimate. - ''' - - # Write estimate - self.logger.info('Summarise the estimate into a dataframe.') - # Make the header name - self.logger.info('Generate the row names.') - eitimator_names = list(self.estimator.keys()) - num_states = len(self.estimator[eitimator_names[0]].states_) - data_dict = {'name': [], - 'state': []} - for i in range(num_states - 1): - data_dict['name'].append(str(i) + ' -- ' + str(i+1)) - data_dict['state'].append('States') - - try: - u_nk = self.u_nk_list[0] - stages = u_nk.reset_index('time').index.names - self.logger.info('use the stage name from u_nk') - except: - try: - dHdl = self.dHdl_list[0] - stages = dHdl.reset_index('time').index.names - self.logger.info('use the stage name from dHdl') - except: # pragma: no cover - stages = [] - self.logger.warning('No stage name found in dHdl or u_nk') - for stage in stages: - data_dict['name'].append(stage.split('-')[0]) - data_dict['state'].append('Stages') - data_dict['name'].append('TOTAL') - data_dict['state'].append('Stages') - - col_names = [] - for estimator_name, estimator in self.estimator.items(): - self.logger.info('Read the results from estimator {}'.format( - estimator_name)) - - # Do the unit conversion - delta_f_ = estimator.delta_f_ - d_delta_f_ = estimator.d_delta_f_ - # Write the estimator header - - col_names.append(estimator_name.upper()) - col_names.append(estimator_name.upper() + '_Error') - data_dict[estimator_name.upper()] = [] - data_dict[estimator_name.upper() + '_Error'] = [] - for index in range(1, num_states): - data_dict[estimator_name.upper()].append( - delta_f_.iloc[index-1, index]) - data_dict[estimator_name.upper() + '_Error'].append( - d_delta_f_.iloc[index - 1, index]) - - self.logger.info('Generate the staged result from estimator {' - '}'.format( - estimator_name)) - for index, stage in enumerate(stages): - if len(stages) == 1: - start = 0 - end = len(estimator.states_) - 1 - else: - # Get the start and the end of the state - lambda_min = min( - [state[index] for state in estimator.states_]) - lambda_max = max( - [state[index] for state in estimator.states_]) - if lambda_min == lambda_max: - # Deal with the case where a certain lambda is used but - # not perturbed - start = 0 - end = 0 - else: - states = [state[index] for state in estimator.states_] - start = list(reversed(states)).index(lambda_min) - start = num_states - start - 1 - end = states.index(lambda_max) - self.logger.info( - 'Stage {} is from state {} to state {}.'.format( - stage, start, end)) - result = delta_f_.iloc[start, end] - if estimator_name != 'bar': - error = d_delta_f_.iloc[start, end] - else: - error = np.sqrt(sum( - [d_delta_f_.iloc[start, start+1]**2 - for i in range(start, end + 1)])) - data_dict[estimator_name.upper()].append(result) - data_dict[estimator_name.upper() + '_Error'].append(error) - - # Total result - result = delta_f_.iloc[0, -1] - if estimator_name != 'bar': - error = d_delta_f_.iloc[0, -1] - else: - error = np.sqrt(sum( - [d_delta_f_.iloc[i, i + 1] ** 2 - for i in range(num_states - 1)])) - data_dict[estimator_name.upper()].append(result) - data_dict[estimator_name.upper() + '_Error'].append(error) - summary = pd.DataFrame.from_dict(data_dict) - - summary = summary.set_index(['state', 'name']) - # Make sure that the columns are in the right order - summary = summary[col_names] - # Remove the name of the index column to make it prettier - summary.index.names = [None, None] - - summary.attrs = estimator.delta_f_.attrs - converter = get_unit_converter(self.units) - summary = converter(summary) - self.summary = summary - self.logger.info('Write results:\n{}'.format(summary.to_string())) - return summary - - def plot_overlap_matrix(self, overlap='O_MBAR.pdf', ax=None): - '''Plot the overlap matrix for MBAR estimator using - :func:`~alchemlyb.visualisation.plot_mbar_overlap_matrix`. - - Parameters - ---------- - overlap : str - The filename for the plot of overlap matrix. Default: 'O_MBAR.pdf' - ax : matplotlib.axes.Axes - Matplotlib axes object where the plot will be drawn on. If ax=None, - a new axes will be generated. - - Returns - ------- - matplotlib.axes.Axes - An axes with the overlap matrix drawn. - ''' - self.logger.info('Plot overlap matrix.') - if 'mbar' in self.estimator: - ax = plot_mbar_overlap_matrix(self.estimator['mbar'].overlap_matrix, - ax=ax) - ax.figure.savefig(join(self.out, overlap)) - self.logger.info('Plot overlap matrix to {} under {}.' - ''.format(self.out, overlap)) - return ax - else: # pragma: no cover - self.logger.warning('MBAR estimator not found. ' - 'Overlap matrix not plotted.') - - def plot_ti_dhdl(self, dhdl_TI='dhdl_TI.pdf', labels=None, colors=None, - ax=None): - '''Plot the dHdl for TI estimator using - :func:`~alchemlyb.visualisation.plot_ti_dhdl`. - - Parameters - ---------- - dhdl_TI : str - The filename for the plot of TI dHdl. Default: 'dhdl_TI.pdf' - labels : List - list of labels for labelling all the alchemical transformations. - colors : List - list of colors for plotting all the alchemical transformations. - Default: ['r', 'g', '#7F38EC', '#9F000F', 'b', 'y'] - ax : matplotlib.axes.Axes - Matplotlib axes object where the plot will be drawn on. If ax=None, - a new axes will be generated. - - Returns - ------- - matplotlib.axes.Axes - An axes with the TI dhdl drawn. - ''' - self.logger.info('Plot TI dHdl.') - if 'ti' in self.estimator: - ax = plot_ti_dhdl(self.estimator['ti'], units=self.units, - labels=labels, colors=colors, ax=ax) - ax.figure.savefig(join(self.out, dhdl_TI)) - self.logger.info('Plot TI dHdl to {} under {}.' - ''.format(dhdl_TI, self.out)) - return ax - - def plot_dF_state(self, dF_state='dF_state.pdf', labels=None, colors=None, - orientation='portrait', nb=10): - '''Plot the dF states using - :func:`~alchemlyb.visualisation.plot_dF_state`. - - Parameters - ---------- - dF_state : str - The filename for the plot of dF states. Default: 'dF_state.pdf' - labels : List - list of labels for labelling different estimators. - colors : List - list of colors for plotting different estimators. - orientation : string - The orientation of the figure. Can be `portrait` or `landscape` - nb : int - Maximum number of dF states in one row in the `portrait` mode - - Returns - ------- - matplotlib.figure.Figure - An Figure with the dF states drawn. - ''' - self.logger.info('Plot dF states.') - fig = plot_dF_state(self.estimator.values(), labels=labels, colors=colors, - units=self.units, - orientation=orientation, nb=nb) - fig.savefig(join(self.out, dF_state)) - self.logger.info('Plot dF state to {} under {}.' - ''.format(dF_state, self.out)) - return fig - - def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', - ax=None): - '''Compute the forward and backward convergence using - :func:`~alchemlyb.convergence.forward_backward_convergence`and - plotted with - :func:`~alchemlyb.visualisation.plot_convergence`. - - Parameters - ---------- - forwrev : int - Plot the free energy change as a function of time in both - directions, with the specified number of points in the time plot. - The number of time points (an integer) must be provided. - estimator : str - The estimator used for convergence analysis. Default: 'mbar' - dF_t : str - The filename for the plot of convergence. Default: 'dF_t.pdf' - ax : matplotlib.axes.Axes - Matplotlib axes object where the plot will be drawn on. If ax=None, - a new axes will be generated. - - Attributes - ---------- - convergence : DataFrame - - Returns - ------- - matplotlib.axes.Axes - An axes with the convergence drawn. - ''' - self.logger.info('Start convergence analysis.') - self.logger.info('Check data availability.') - - if estimator.lower() in ['mbar', 'bar']: - try: - u_nk_list = self.u_nk_sample_list - self.logger.info('Subsampled u_nk is available.') - except AttributeError: - try: - u_nk_list = self.u_nk_list - self.logger.info('Subsampled u_nk not available, ' - 'use original data instead.') - except AttributeError: # pragma: no cover - self.logger.warning('u_nk is not available.') - convergence = forward_backward_convergence(u_nk_list, - estimator=estimator, - num=forwrev) - else: - try: - dHdl_list = self.dHdl_sample_list - self.logger.info('Subsampled dHdl is available.') - except AttributeError: - try: - dHdl_list = self.dHdl_list - self.logger.info('Subsampled dHdl not available, ' - 'use original data instead.') - except AttributeError: # pragma: no cover - self.logger.warning('dHdl is not available.') - convergence = forward_backward_convergence(dHdl_list, - estimator=estimator, - num=forwrev) - - self.convergence = get_unit_converter(self.units)(convergence) - - self.logger.info('Plot convergence analysis to {} under {}.' - ''.format(dF_t, self.out)) - - ax = plot_convergence(self.convergence, - units=self.units, ax=ax) - ax.figure.savefig(join(self.out, dF_t)) - return ax From b173d9366d401d6b40a7e5ada0d5bdede5a20fc9 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 13 Mar 2022 12:17:47 +0000 Subject: [PATCH 066/123] Update base.py --- src/alchemlyb/workflows/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/alchemlyb/workflows/base.py b/src/alchemlyb/workflows/base.py index 57d347f1..47f67a67 100644 --- a/src/alchemlyb/workflows/base.py +++ b/src/alchemlyb/workflows/base.py @@ -57,7 +57,7 @@ def run(self, *args, **kwargs): """ self.read() - self.subsample() + self.preprocess() self.estimate() self.convergence() self.plot() @@ -78,7 +78,7 @@ def read(self, *args, **kwargs): self.u_nk_list = [] self.dHdl_list = [] - def subsample(self, *args, **kwargs): + def preprocess(self, *args, **kwargs): """ The function that subsample the u_nk and dHdl in `u_nk_list` and `dHdl_list`. From 9ddc811d7205e9cca0c577c1326b85f79a0d1d87 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 13 Mar 2022 12:18:14 +0000 Subject: [PATCH 067/123] Update alchemlyb.workflows.base.rst --- docs/workflows/alchemlyb.workflows.base.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/workflows/alchemlyb.workflows.base.rst b/docs/workflows/alchemlyb.workflows.base.rst index 849c210e..971cfd61 100644 --- a/docs/workflows/alchemlyb.workflows.base.rst +++ b/docs/workflows/alchemlyb.workflows.base.rst @@ -15,7 +15,7 @@ step-by-step fashion. :: >>> from alchemlyb.workflows.base import WorkflowBase >>> workflow = WorkflowBase() >>> workflow.read() - >>> workflow.subsample() + >>> workflow.preprocess() >>> workflow.estimate() >>> workflow.convergence() >>> workflow.plot() From 3dc8c2af4102f52e747f252ac4ee15341b2710c4 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 13 Mar 2022 19:31:03 +0000 Subject: [PATCH 068/123] update --- docs/workflows/alchemlyb.workflows.base.rst | 2 +- src/alchemlyb/workflows/base.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/workflows/alchemlyb.workflows.base.rst b/docs/workflows/alchemlyb.workflows.base.rst index 971cfd61..6595af9c 100644 --- a/docs/workflows/alchemlyb.workflows.base.rst +++ b/docs/workflows/alchemlyb.workflows.base.rst @@ -17,7 +17,7 @@ step-by-step fashion. :: >>> workflow.read() >>> workflow.preprocess() >>> workflow.estimate() - >>> workflow.convergence() + >>> workflow.check_convergence() >>> workflow.plot() API Reference diff --git a/src/alchemlyb/workflows/base.py b/src/alchemlyb/workflows/base.py index 47f67a67..97213916 100644 --- a/src/alchemlyb/workflows/base.py +++ b/src/alchemlyb/workflows/base.py @@ -107,7 +107,7 @@ def estimate(self, *args, **kwargs): """ self.result = pd.DataFrame() - def convergence(self, *args, **kwargs): + def check_convergence(self, *args, **kwargs): """ The function for doing convergence analysis. Attributes From ecd29eeae6db10b155102e1f024376af6d73ea41 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 27 Mar 2022 11:07:35 +0100 Subject: [PATCH 069/123] update --- src/alchemlyb/tests/test_workflow_ABFE.py | 163 ++++++++-------------- src/alchemlyb/workflows/__init__.py | 2 +- src/alchemlyb/workflows/abfe.py | 44 +++--- 3 files changed, 83 insertions(+), 126 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index d445ef0e..54bdd106 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -1,8 +1,9 @@ import numpy as np import pytest import os +from unittest.mock import patch -from alchemlyb.workflows import ABFE +from alchemlyb.workflows.abfe import ABFE from alchemtest.gmx import load_ABFE, load_benzene class Test_automatic_ABFE(): @@ -10,15 +11,15 @@ class Test_automatic_ABFE(): three stage transformation.''' @staticmethod - @pytest.fixture(scope='class') - def workflow(): + @pytest.fixture(scope='session') + def workflow(tmp_path_factory): + outdir = tmp_path_factory.mktemp("out") dir = os.path.dirname(load_ABFE()['data']['complex'][0]) workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, - prefix='dhdl', suffix='xvg', T=310, skiptime=10, - uncorr='dhdl', threshold=50, - methods=('mbar', 'bar', 'ti'), out='./', - overlap='O_MBAR.pdf', - breakdown=True, forwrev=10, log='result.log') + prefix='dhdl', suffix='xvg', T=310, out=str(outdir)) + workflow.run(skiptime=10, uncorr='dhdl', threshold=50, + methods=('mbar', 'bar', 'ti'), overlap='O_MBAR.pdf', + breakdown=True, forwrev=10) return workflow def test_read(self, workflow): @@ -35,7 +36,7 @@ def test_subsample(self, workflow): assert all([len(u_nk) < 1001 for u_nk in workflow.u_nk_sample_list]) assert all([len(dHdl) < 1001 for dHdl in workflow.dHdl_sample_list]) - def test_estomator(self, workflow): + def test_estimator(self, workflow): '''Test if all three estimator has been used.''' assert len(workflow.estimator) == 3 assert 'mbar' in workflow.estimator @@ -45,29 +46,26 @@ def test_estomator(self, workflow): def test_summary(self, workflow): '''Test if if the summary is right.''' summary = workflow.generate_result() - assert np.isclose(summary['MBAR']['Stages']['TOTAL'], 21.788, 0.1) + assert np.isclose(summary['MBAR']['Stages']['TOTAL'], 21.8, 0.1) + assert np.isclose(summary['TI']['Stages']['TOTAL'], 21.8, 0.1) + assert np.isclose(summary['BAR']['Stages']['TOTAL'], 21.8, 0.1) def test_O_MBAR(self, workflow): '''test if the O_MBAR.pdf has been plotted.''' - assert os.path.isfile('O_MBAR.pdf') - os.remove('O_MBAR.pdf') + assert os.path.isfile(os.path.join(workflow.out, 'O_MBAR.pdf')) def test_dhdl_TI(self, workflow): '''test if the dhdl_TI.pdf has been plotted.''' - assert os.path.isfile('dhdl_TI.pdf') - os.remove('dhdl_TI.pdf') + assert os.path.isfile(os.path.join(workflow.out, 'dhdl_TI.pdf')) def test_dF_state(self, workflow): '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile('dF_state.pdf') - os.remove('dF_state.pdf') - assert os.path.isfile('dF_state_long.pdf') - os.remove('dF_state_long.pdf') + assert os.path.isfile(os.path.join(workflow.out, 'dF_state.pdf')) + assert os.path.isfile(os.path.join(workflow.out, 'dF_state_long.pdf')) - def test_convergence(self, workflow): + def test_check_convergence(self, workflow): '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile('dF_t.pdf') - os.remove('dF_t.pdf') + assert os.path.isfile(os.path.join(workflow.out, 'dF_t.pdf')) assert len(workflow.convergence) == 10 class Test_manual_ABFE(): @@ -75,12 +73,14 @@ class Test_manual_ABFE(): stage transformation.''' @staticmethod - @pytest.fixture(scope='class') - def workflow(): + @pytest.fixture(scope='session') + def workflow(tmp_path_factory): + outdir = tmp_path_factory.mktemp("out") dir = os.path.dirname(load_ABFE()['data']['complex'][0]) workflow = ABFE(software='Gromacs', dir=dir, prefix='dhdl', - suffix='xvg', T=310) + suffix='xvg', T=310, out=str(outdir)) workflow.update_units('kcal/mol') + workflow.read() workflow.preprocess(skiptime=10, uncorr='dhdl', threshold=50) workflow.estimate(methods=('mbar', 'bar', 'ti')) workflow.plot_overlap_matrix(overlap='O_MBAR.pdf') @@ -103,7 +103,7 @@ def test_subsample(self, workflow): assert all([len(u_nk) < 1001 for u_nk in workflow.u_nk_sample_list]) assert all([len(dHdl) < 1001 for dHdl in workflow.dHdl_sample_list]) - def test_estomator(self, workflow): + def test_estimator(self, workflow): '''Test if all three estimator has been used.''' assert len(workflow.estimator) == 3 assert 'mbar' in workflow.estimator @@ -112,80 +112,45 @@ def test_estomator(self, workflow): def test_O_MBAR(self, workflow): '''test if the O_MBAR.pdf has been plotted.''' - assert os.path.isfile('O_MBAR.pdf') - os.remove('O_MBAR.pdf') + assert os.path.isfile(os.path.join(workflow.out, 'O_MBAR.pdf')) def test_dhdl_TI(self, workflow): '''test if the dhdl_TI.pdf has been plotted.''' - assert os.path.isfile('dhdl_TI.pdf') - os.remove('dhdl_TI.pdf') + assert os.path.isfile(os.path.join(workflow.out, 'dhdl_TI.pdf')) def test_dF_state(self, workflow): '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile('dF_state.pdf') - os.remove('dF_state.pdf') + assert os.path.isfile(os.path.join(workflow.out, 'dF_state.pdf')) def test_convergence(self, workflow): '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile('dF_t.pdf') - os.remove('dF_t.pdf') - assert len(workflow.convergence) == 10 - - def test_convergence_nosample_u_nk(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - u_nk_sample_list = workflow.u_nk_sample_list - delattr(workflow, 'u_nk_sample_list') - workflow.check_convergence(10) - os.remove('dF_t.pdf') - assert len(workflow.convergence) == 10 - workflow.u_nk_sample_list = u_nk_sample_list - - def test_convergence_nosample_dhdl(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - dHdl_sample_list = workflow.dHdl_sample_list - delattr(workflow, 'dHdl_sample_list') - workflow.check_convergence(10, estimator='ti') - os.remove('dF_t.pdf') - assert len(workflow.convergence) == 10 - workflow.dHdl_sample_list = dHdl_sample_list - - def test_convergence_dhdl(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - workflow.check_convergence(10, estimator='ti') - os.remove('dF_t.pdf') + assert os.path.isfile(os.path.join(workflow.out, 'dF_t.pdf')) assert len(workflow.convergence) == 10 def test_convergence_TI(self, workflow): '''test if the dF_state.pdf has been plotted.''' - workflow.check_convergence(10, estimator='ti', dF_t='dF_t.pdf') - assert os.path.isfile('dF_t.pdf') - os.remove('dF_t.pdf') + workflow.check_convergence(10, estimator='ti', dF_t='test_dF_t.pdf') + assert os.path.isfile(os.path.join(workflow.out, 'test_dF_t.pdf')) assert len(workflow.convergence) == 10 - def test_dhdl_TI_noTI(self, workflow): - '''Test to plot the dhdl_TI when ti estimator is not there''' - full_estimator = workflow.estimator - workflow.estimator.pop('ti') - workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') - assert os.path.isfile('dhdl_TI.pdf') == False - workflow.estimator = full_estimator class Test_automatic_benzene(): '''Test the full automatic workflow for load_benzene from alchemtest.gmx for single stage transformation.''' @staticmethod - @pytest.fixture(scope='class') - def workflow(): + @pytest.fixture(scope='session') + def workflow(tmp_path_factory): + outdir = tmp_path_factory.mktemp("out") dir = os.path.dirname(os.path.dirname( load_benzene()['data']['Coulomb'][0])) dir = os.path.join(dir, '*') workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, - prefix='dhdl', suffix='bz2', T=310, skiptime=0, - uncorr='dhdl', threshold=50, - methods=('mbar', 'bar', 'ti'), out='./', - overlap='O_MBAR.pdf', - breakdown=True, forwrev=10, log='result.log') + prefix='dhdl', suffix='bz2', T=310, + out=outdir) + workflow.run(skiptime=0, uncorr='dhdl', threshold=50, + methods=('mbar', 'bar', 'ti'), overlap='O_MBAR.pdf', + breakdown=True, forwrev=10) return workflow def test_read(self, workflow): @@ -195,8 +160,8 @@ def test_read(self, workflow): assert all([len(u_nk) == 4001 for u_nk in workflow.u_nk_list]) assert all([len(dHdl) == 4001 for dHdl in workflow.dHdl_list]) - def test_estomator(self, workflow): - '''Test if all three estimator has been used.''' + def test_estimator(self, workflow): + '''Test if all three estimators have been used.''' assert len(workflow.estimator) == 3 assert 'mbar' in workflow.estimator assert 'ti' in workflow.estimator @@ -204,25 +169,19 @@ def test_estomator(self, workflow): def test_O_MBAR(self, workflow): '''test if the O_MBAR.pdf has been plotted.''' - assert os.path.isfile('O_MBAR.pdf') - os.remove('O_MBAR.pdf') + assert os.path.isfile(os.path.join(workflow.out, 'O_MBAR.pdf')) def test_dhdl_TI(self, workflow): '''test if the dhdl_TI.pdf has been plotted.''' - assert os.path.isfile('dhdl_TI.pdf') - os.remove('dhdl_TI.pdf') + assert os.path.isfile(os.path.join(workflow.out, 'dhdl_TI.pdf')) def test_dF_state(self, workflow): '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile('dF_state.pdf') - os.remove('dF_state.pdf') - assert os.path.isfile('dF_state_long.pdf') - os.remove('dF_state_long.pdf') + assert os.path.isfile(os.path.join(workflow.out, 'dF_state.pdf')) def test_convergence(self, workflow): '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile('dF_t.pdf') - os.remove('dF_t.pdf') + assert os.path.isfile(os.path.join(workflow.out, 'dF_t.pdf')) assert len(workflow.convergence) == 10 class Test_unpertubed_lambda(): @@ -240,13 +199,15 @@ class Test_unpertubed_lambda(): ''' @staticmethod - @pytest.fixture(scope='class') - def workflow(): + @pytest.fixture(scope='session') + def workflow(tmp_path_factory): + outdir = tmp_path_factory.mktemp("out") dir = os.path.dirname(os.path.dirname( load_benzene()['data']['Coulomb'][0])) dir = os.path.join(dir, '*') workflow = ABFE(software='Gromacs', dir=dir, prefix='dhdl', - suffix='bz2', T=310) + suffix='bz2', T=310, out=outdir) + workflow.read() # Block the n_uk workflow.u_nk_list = [] # Add another lambda column @@ -263,18 +224,15 @@ def workflow(): def test_dhdl_TI(self, workflow): '''test if the dhdl_TI.pdf has been plotted.''' - assert os.path.isfile('dhdl_TI.pdf') - os.remove('dhdl_TI.pdf') + assert os.path.isfile(os.path.join(workflow.out, 'dhdl_TI.pdf')) def test_dF_state(self, workflow): '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile('dF_state.pdf') - os.remove('dF_state.pdf') + assert os.path.isfile(os.path.join(workflow.out, 'dF_state.pdf')) def test_convergence(self, workflow): '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile('dF_t.pdf') - os.remove('dF_t.pdf') + assert os.path.isfile(os.path.join(workflow.out, 'dF_t.pdf')) assert len(workflow.convergence) == 10 def test_single_estimator_mbar(self, workflow): @@ -293,18 +251,17 @@ def workflow(): dir = os.path.join(dir, '*') workflow = ABFE(software='Gromacs', dir=dir, prefix='dhdl', suffix='bz2', T=310) + workflow.read() return workflow - def test_uncorr_threshold(self, workflow): - original_u_nk = workflow.u_nk_list - original_dHdl = workflow.dHdl_list - workflow.u_nk_list = [u_nk[:40] for u_nk in original_u_nk] - workflow.dHdl_list = [dHdl[:40] for dHdl in original_dHdl] + def test_uncorr_threshold(self, workflow, monkeypatch): + monkeypatch.setattr(workflow.u_nk_list, + [u_nk[:40] for u_nk in workflow.u_nk_list]) + monkeypatch.setattr(workflow.dHdl_list, + [dHdl[:40] for dHdl in workflow.dHdl_list]) workflow.preprocess(threshold=50) assert all([len(u_nk) == 40 for u_nk in workflow.u_nk_sample_list]) assert all([len(dHdl) == 40 for dHdl in workflow.dHdl_sample_list]) - workflow.u_nk_list = original_u_nk - workflow.dHdl_list = original_dHdl def test_single_estimator_mbar(self, workflow): workflow.estimate(methods='mbar') @@ -314,8 +271,6 @@ def test_single_estimator_mbar(self, workflow): assert np.isclose(summary['MBAR']['Stages']['TOTAL'], 2.946, 0.1) def test_single_estimator_ti(self, workflow): - u_nk_list = workflow.u_nk_list - delattr(workflow, 'u_nk_list') workflow.estimate(methods='ti') summary = workflow.generate_result() assert np.isclose(summary['TI']['Stages']['TOTAL'], 2.946, 0.1) diff --git a/src/alchemlyb/workflows/__init__.py b/src/alchemlyb/workflows/__init__.py index 9e81a9e7..1fc942bb 100644 --- a/src/alchemlyb/workflows/__init__.py +++ b/src/alchemlyb/workflows/__init__.py @@ -1,4 +1,4 @@ __all__ = [ 'base', - 'abfe'. + 'abfe', ] diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 64282b69..b1d6568d 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -6,6 +6,7 @@ import logging import matplotlib.pyplot as plt +from .base import WorkflowBase from ..parsing import gmx from ..preprocessing.subsampling import decorrelate_dhdl, decorrelate_u_nk from ..estimators import BAR, TI @@ -18,7 +19,7 @@ from .. import __version__ -class ABFE(): +class ABFE(WorkflowBase): '''Alchemical Analysis style automatic workflow. Parameters @@ -81,44 +82,41 @@ class ABFE(): The list of dHdl read from the files. ''' def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, - prefix='dhdl', suffix='xvg', T=298, skiptime=0, uncorr=None, - threshold=50, methods=None, out=os.path.curdir, - overlap=None, breakdown=None, forwrev=None, - log='result.log'): + prefix='dhdl', suffix='xvg', T=298, out=os.path.curdir): - logging.basicConfig(filename=log, level=logging.INFO) + super().__init__(units, software, T, out) self.logger = logging.getLogger('alchemlyb.workflows.ABFE') self.logger.info('Initialise Alchemlyb ABFE Workflow') self.logger.info('Alchemlyb Version: {}'.format(__version__)) - - self.logger.info('Set temperature to {} K.'.format(T)) - self.T = T - self.out = out + self.logger.info('Set Temperature to {} K.'.format(T)) + self.logger.info('Set Software to {}.'.format(software)) self.update_units(units) self.logger.info('Finding files with prefix: {}, suffix: {} under ' 'directory {} produced by {}'.format(prefix, suffix, dir, software)) - file_list = glob(join(dir, prefix + '*' + suffix)) + self.file_list = glob(join(dir, prefix + '*' + suffix)) - self.logger.info('Found {} xvg files.'.format(len(file_list))) + self.logger.info('Found {} xvg files.'.format(len(self.file_list))) self.logger.info('Unsorted file list: \n{}'.format('\n'.join( - file_list))) + self.file_list))) if software.lower() == 'gromacs': self.logger.info('Using {} parser to read the data.'.format( software)) - extract_u_nk = gmx.extract_u_nk - extract_dHdl = gmx.extract_dHdl + self._extract_u_nk = gmx.extract_u_nk + self._extract_dHdl = gmx.extract_dHdl else: # pragma: no cover raise NameError('{} parser not found.'.format(software)) + def read(self): + u_nk_list = [] dHdl_list = [] - for xvg in file_list: + for xvg in self.file_list: try: - u_nk = extract_u_nk(xvg, T=T) + u_nk = self._extract_u_nk(xvg, T=self.T) self.logger.info( 'Reading {} lines of u_nk from {}'.format(len(u_nk), xvg)) u_nk_list.append(u_nk) @@ -127,7 +125,7 @@ def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, 'Error reading read u_nk from {}.'.format(xvg)) try: - dhdl = extract_dHdl(xvg, T=T) + dhdl = self._extract_dHdl(xvg, T=self.T) self.logger.info( 'Reading {} lines of dhdl from {}'.format(len(dhdl), xvg)) dHdl_list.append(dhdl) @@ -139,23 +137,27 @@ def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, if len(u_nk_list) > 0: self.logger.info('Sort files according to the u_nk.') column_names = u_nk_list[0].columns.values.tolist() - index_list = sorted(range(len(file_list)), + index_list = sorted(range(len(self.file_list)), key=lambda x:column_names.index( u_nk_list[x].reset_index('time').index.values[0])) else: self.logger.info('Sort files according to the dHdl.') column_names = sorted([dHdl.reset_index('time').index.values[0] for dHdl in dHdl_list]) - index_list = sorted(range(len(file_list)), + index_list = sorted(range(len(self.file_list)), key=lambda x:column_names.index( dHdl_list[x].reset_index('time').index.values[0])) - self.file_list = [file_list[i] for i in index_list] + self.file_list = [self.file_list[i] for i in index_list] self.logger.info('Sorted file list: \n{}'.format('\n'.join( self.file_list))) self.u_nk_list = [u_nk_list[i] for i in index_list] self.dHdl_list = [dHdl_list[i] for i in index_list] + def run(self, skiptime=0, uncorr=None, threshold=50, methods=None, + overlap=None, breakdown=None, forwrev=None, *args, **kwargs): + self.read() + if uncorr is not None: self.preprocess(skiptime=skiptime, uncorr=uncorr, threshold=threshold) From 0b37cf425e7ceb6ae9bbed1a0b889a0c8af9b844 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 27 Mar 2022 11:09:19 +0100 Subject: [PATCH 070/123] Update base.py --- src/alchemlyb/workflows/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alchemlyb/workflows/base.py b/src/alchemlyb/workflows/base.py index 97213916..071e586e 100644 --- a/src/alchemlyb/workflows/base.py +++ b/src/alchemlyb/workflows/base.py @@ -59,7 +59,7 @@ def run(self, *args, **kwargs): self.read() self.preprocess() self.estimate() - self.convergence() + self.check_convergence() self.plot() def read(self, *args, **kwargs): From dd74acf855d6e280935ad52ea4b29c299b2e1cc6 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 10 Apr 2022 10:31:36 +0100 Subject: [PATCH 071/123] update --- CHANGES | 25 ++++++++++++++++--------- src/alchemlyb/tests/test_workflow.py | 2 +- src/alchemlyb/workflows/base.py | 2 +- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/CHANGES b/CHANGES index 824dcc56..622dcf1d 100644 --- a/CHANGES +++ b/CHANGES @@ -13,12 +13,19 @@ The rules for this file: * release numbers follow "Semantic Versioning" https://semver.org ------------------------------------------------------------------------------ -??/??/2022 xiki-tempula - * 0.6.1 + +*/*/2022 xiki-tempula + + * 0.7.0 + +Enhancements + - Add a base class for the workflow. + - Add filter function to gmx.extract to make it more robust. + Fixes - added AutoMBAR to convergence analysis (#189) - + 12/28/2021 schlaicha, xiki-tempula, jhenin, ttjoseph, orbeckst @@ -66,7 +73,7 @@ Fixes Changes - alchemlyb adopts NEP29 https://numpy.org/neps/nep-0029-deprecation_policy.html - to effectively only support the last 2-3 minor Python versions + to effectively only support the last 2-3 minor Python versions - Removed support for Python 2.7 and 3.6 (issue #130, #140): supported/tested Python versions are 3.7, 3.8, 3.9 on Linux, macOS, and Windows @@ -74,11 +81,11 @@ Changes constants used by the corresponding MD engine (issue #125, PR #129). This leads to small changes in results from GROMACS simulations in kT compared to the previous releases, with a relative difference on the order - of 1e-7. + of 1e-7. - Added unit-awareness: the base unit remains kT but dataframes need to carry the unit and temperature in a DataFrame.attrs so that dataframes can be easily converted to different energy units (issue #125) - - The parser outputs dataframe with metadata (issue #125, PR #129). + - The parser outputs dataframe with metadata (issue #125, PR #129). - Visualisation module will change the data according to input unit (issue #125, PR #129). - Bump the minimum pandas version to 1.2 (issue #130, #140). @@ -93,7 +100,7 @@ Fixes compatible with pandas.concat() (issue #150, PR #152). - Fix the support for pandas >= 1.3 by skipping 1.3.0 (issue #147, PR #148). - Fix separate_dhdl not work for multiple columns (issue #149, PR #151). - + 06/08/2021 orbeckst @@ -125,7 +132,7 @@ Changes * 0.4.0 Enhancements - - Allow the dhdl from TI estimator to be separated for multiple lambda + - Allow the dhdl from TI estimator to be separated for multiple lambda (PR #121). - Allow the convergence to be plotted. (PR #121) - Allow automatic sorting and duplication removal during subsampling @@ -199,4 +206,4 @@ Features: - Parsers for GROMACS, including reduced potentials and gradients. - Subsampler functions for slicing, statitistical inefficiency, equilibration detection. - Minimally functional estimators for MBAR, TI. - - high test coverage (works with data in alchemistry/alchemtests) + - high test coverage (works with data in alchemistry/alchemtests) \ No newline at end of file diff --git a/src/alchemlyb/tests/test_workflow.py b/src/alchemlyb/tests/test_workflow.py index 94e61cd2..a4308145 100644 --- a/src/alchemlyb/tests/test_workflow.py +++ b/src/alchemlyb/tests/test_workflow.py @@ -12,7 +12,7 @@ def workflow(tmp_path_factory): workflow.run() return workflow - def test_write(self, workflow, tmpdir): + def test_write(self, workflow): '''Patch the output directory to tmpdir''' workflow.result.to_pickle(os.path.join(workflow.out, 'result.pkl')) assert os.path.exists(os.path.join(workflow.out, 'result.pkl')) diff --git a/src/alchemlyb/workflows/base.py b/src/alchemlyb/workflows/base.py index 071e586e..a0af5f8b 100644 --- a/src/alchemlyb/workflows/base.py +++ b/src/alchemlyb/workflows/base.py @@ -123,4 +123,4 @@ def plot(self, *args, **kwargs): """ The function for producing any plots. """ - ... + pass From b8355bb5bb875f7f66fdd2f8db40c7c18d74582a Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 10 Apr 2022 10:33:11 +0100 Subject: [PATCH 072/123] Update CHANGES --- CHANGES | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 622dcf1d..9760c8fa 100644 --- a/CHANGES +++ b/CHANGES @@ -206,4 +206,4 @@ Features: - Parsers for GROMACS, including reduced potentials and gradients. - Subsampler functions for slicing, statitistical inefficiency, equilibration detection. - Minimally functional estimators for MBAR, TI. - - high test coverage (works with data in alchemistry/alchemtests) \ No newline at end of file + - high test coverage (works with data in alchemistry/alchemtests) From 36ca20714844ba2798bd375719be2894449b40f9 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 10 Apr 2022 10:52:33 +0100 Subject: [PATCH 073/123] update --- src/alchemlyb/tests/test_workflow_ABFE.py | 90 +++++++++++------------ src/alchemlyb/workflows/abfe.py | 8 +- 2 files changed, 49 insertions(+), 49 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index 54bdd106..804d0cc8 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -68,7 +68,7 @@ def test_check_convergence(self, workflow): assert os.path.isfile(os.path.join(workflow.out, 'dF_t.pdf')) assert len(workflow.convergence) == 10 -class Test_manual_ABFE(): +class Test_manual_ABFE(Test_automatic_ABFE): '''Test the manual workflow for load_ABFE from alchemtest.gmx for three stage transformation.''' @@ -88,50 +88,50 @@ def workflow(tmp_path_factory): workflow.plot_dF_state(dF_state='dF_state.pdf') workflow.check_convergence(10, dF_t='dF_t.pdf') return workflow - - def test_read(self, workflow): - '''test if the files has been loaded correctly.''' - assert len(workflow.u_nk_list) == 30 - assert len(workflow.dHdl_list) == 30 - assert all([len(u_nk) == 1001 for u_nk in workflow.u_nk_list]) - assert all([len(dHdl) == 1001 for dHdl in workflow.dHdl_list]) - - def test_subsample(self, workflow): - '''Test if the data has been shrinked by subsampling.''' - assert len(workflow.u_nk_sample_list) == 30 - assert len(workflow.dHdl_sample_list) == 30 - assert all([len(u_nk) < 1001 for u_nk in workflow.u_nk_sample_list]) - assert all([len(dHdl) < 1001 for dHdl in workflow.dHdl_sample_list]) - - def test_estimator(self, workflow): - '''Test if all three estimator has been used.''' - assert len(workflow.estimator) == 3 - assert 'mbar' in workflow.estimator - assert 'ti' in workflow.estimator - assert 'bar' in workflow.estimator - - def test_O_MBAR(self, workflow): - '''test if the O_MBAR.pdf has been plotted.''' - assert os.path.isfile(os.path.join(workflow.out, 'O_MBAR.pdf')) - - def test_dhdl_TI(self, workflow): - '''test if the dhdl_TI.pdf has been plotted.''' - assert os.path.isfile(os.path.join(workflow.out, 'dhdl_TI.pdf')) - - def test_dF_state(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile(os.path.join(workflow.out, 'dF_state.pdf')) - - def test_convergence(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - assert os.path.isfile(os.path.join(workflow.out, 'dF_t.pdf')) - assert len(workflow.convergence) == 10 - - def test_convergence_TI(self, workflow): - '''test if the dF_state.pdf has been plotted.''' - workflow.check_convergence(10, estimator='ti', dF_t='test_dF_t.pdf') - assert os.path.isfile(os.path.join(workflow.out, 'test_dF_t.pdf')) - assert len(workflow.convergence) == 10 + # + # def test_read(self, workflow): + # '''test if the files has been loaded correctly.''' + # assert len(workflow.u_nk_list) == 30 + # assert len(workflow.dHdl_list) == 30 + # assert all([len(u_nk) == 1001 for u_nk in workflow.u_nk_list]) + # assert all([len(dHdl) == 1001 for dHdl in workflow.dHdl_list]) + # + # def test_subsample(self, workflow): + # '''Test if the data has been shrinked by subsampling.''' + # assert len(workflow.u_nk_sample_list) == 30 + # assert len(workflow.dHdl_sample_list) == 30 + # assert all([len(u_nk) < 1001 for u_nk in workflow.u_nk_sample_list]) + # assert all([len(dHdl) < 1001 for dHdl in workflow.dHdl_sample_list]) + # + # def test_estimator(self, workflow): + # '''Test if all three estimator has been used.''' + # assert len(workflow.estimator) == 3 + # assert 'mbar' in workflow.estimator + # assert 'ti' in workflow.estimator + # assert 'bar' in workflow.estimator + # + # def test_O_MBAR(self, workflow): + # '''test if the O_MBAR.pdf has been plotted.''' + # assert os.path.isfile(os.path.join(workflow.out, 'O_MBAR.pdf')) + # + # def test_dhdl_TI(self, workflow): + # '''test if the dhdl_TI.pdf has been plotted.''' + # assert os.path.isfile(os.path.join(workflow.out, 'dhdl_TI.pdf')) + # + # def test_dF_state(self, workflow): + # '''test if the dF_state.pdf has been plotted.''' + # assert os.path.isfile(os.path.join(workflow.out, 'dF_state.pdf')) + # + # def test_convergence(self, workflow): + # '''test if the dF_state.pdf has been plotted.''' + # assert os.path.isfile(os.path.join(workflow.out, 'dF_t.pdf')) + # assert len(workflow.convergence) == 10 + # + # def test_convergence_TI(self, workflow): + # '''test if the dF_state.pdf has been plotted.''' + # workflow.check_convergence(10, estimator='ti', dF_t='test_dF_t.pdf') + # assert os.path.isfile(os.path.join(workflow.out, 'test_dF_t.pdf')) + # assert len(workflow.convergence) == 10 class Test_automatic_benzene(): diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index b1d6568d..9dca623d 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -111,7 +111,6 @@ def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, raise NameError('{} parser not found.'.format(software)) def read(self): - u_nk_list = [] dHdl_list = [] for xvg in self.file_list: @@ -179,7 +178,8 @@ def run(self, skiptime=0, uncorr=None, threshold=50, methods=None, plt.close(fig) if forwrev is not None: - ax = self.check_convergence(forwrev, estimator='mbar', dF_t='dF_t.pdf') + ax = self.check_convergence(forwrev, estimator='autombar', + dF_t='dF_t.pdf') plt.close(ax.figure) @@ -580,7 +580,7 @@ def plot_dF_state(self, dF_state='dF_state.pdf', labels=None, colors=None, ''.format(dF_state, self.out)) return fig - def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', + def check_convergence(self, forwrev, estimator='autombar', dF_t='dF_t.pdf', ax=None): '''Compute the forward and backward convergence using :func:`~alchemlyb.convergence.forward_backward_convergence`and @@ -594,7 +594,7 @@ def check_convergence(self, forwrev, estimator='mbar', dF_t='dF_t.pdf', directions, with the specified number of points in the time plot. The number of time points (an integer) must be provided. estimator : str - The estimator used for convergence analysis. Default: 'mbar' + The estimator used for convergence analysis. Default: 'autombar' dF_t : str The filename for the plot of convergence. Default: 'dF_t.pdf' ax : matplotlib.axes.Axes From 02e95206967ac309bc6de2f280f305345e301ae1 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 10 Apr 2022 17:22:45 +0100 Subject: [PATCH 074/123] update --- src/alchemlyb/tests/test_workflow_ABFE.py | 111 +++++++++------------- src/alchemlyb/workflows/abfe.py | 24 +++-- 2 files changed, 58 insertions(+), 77 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index 804d0cc8..e667738a 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -1,7 +1,6 @@ import numpy as np import pytest import os -from unittest.mock import patch from alchemlyb.workflows.abfe import ABFE from alchemtest.gmx import load_ABFE, load_benzene @@ -37,7 +36,7 @@ def test_subsample(self, workflow): assert all([len(dHdl) < 1001 for dHdl in workflow.dHdl_sample_list]) def test_estimator(self, workflow): - '''Test if all three estimator has been used.''' + '''Test if all three estimators have been used.''' assert len(workflow.estimator) == 3 assert 'mbar' in workflow.estimator assert 'ti' in workflow.estimator @@ -50,15 +49,15 @@ def test_summary(self, workflow): assert np.isclose(summary['TI']['Stages']['TOTAL'], 21.8, 0.1) assert np.isclose(summary['BAR']['Stages']['TOTAL'], 21.8, 0.1) - def test_O_MBAR(self, workflow): + def test_plot_O_MBAR(self, workflow): '''test if the O_MBAR.pdf has been plotted.''' assert os.path.isfile(os.path.join(workflow.out, 'O_MBAR.pdf')) - def test_dhdl_TI(self, workflow): + def test_plot_dhdl_TI(self, workflow): '''test if the dhdl_TI.pdf has been plotted.''' assert os.path.isfile(os.path.join(workflow.out, 'dhdl_TI.pdf')) - def test_dF_state(self, workflow): + def test_plot_dF_state(self, workflow): '''test if the dF_state.pdf has been plotted.''' assert os.path.isfile(os.path.join(workflow.out, 'dF_state.pdf')) assert os.path.isfile(os.path.join(workflow.out, 'dF_state_long.pdf')) @@ -88,51 +87,27 @@ def workflow(tmp_path_factory): workflow.plot_dF_state(dF_state='dF_state.pdf') workflow.check_convergence(10, dF_t='dF_t.pdf') return workflow - # - # def test_read(self, workflow): - # '''test if the files has been loaded correctly.''' - # assert len(workflow.u_nk_list) == 30 - # assert len(workflow.dHdl_list) == 30 - # assert all([len(u_nk) == 1001 for u_nk in workflow.u_nk_list]) - # assert all([len(dHdl) == 1001 for dHdl in workflow.dHdl_list]) - # - # def test_subsample(self, workflow): - # '''Test if the data has been shrinked by subsampling.''' - # assert len(workflow.u_nk_sample_list) == 30 - # assert len(workflow.dHdl_sample_list) == 30 - # assert all([len(u_nk) < 1001 for u_nk in workflow.u_nk_sample_list]) - # assert all([len(dHdl) < 1001 for dHdl in workflow.dHdl_sample_list]) - # - # def test_estimator(self, workflow): - # '''Test if all three estimator has been used.''' - # assert len(workflow.estimator) == 3 - # assert 'mbar' in workflow.estimator - # assert 'ti' in workflow.estimator - # assert 'bar' in workflow.estimator - # - # def test_O_MBAR(self, workflow): - # '''test if the O_MBAR.pdf has been plotted.''' - # assert os.path.isfile(os.path.join(workflow.out, 'O_MBAR.pdf')) - # - # def test_dhdl_TI(self, workflow): - # '''test if the dhdl_TI.pdf has been plotted.''' - # assert os.path.isfile(os.path.join(workflow.out, 'dhdl_TI.pdf')) - # - # def test_dF_state(self, workflow): - # '''test if the dF_state.pdf has been plotted.''' - # assert os.path.isfile(os.path.join(workflow.out, 'dF_state.pdf')) - # - # def test_convergence(self, workflow): - # '''test if the dF_state.pdf has been plotted.''' - # assert os.path.isfile(os.path.join(workflow.out, 'dF_t.pdf')) - # assert len(workflow.convergence) == 10 - # - # def test_convergence_TI(self, workflow): - # '''test if the dF_state.pdf has been plotted.''' - # workflow.check_convergence(10, estimator='ti', dF_t='test_dF_t.pdf') - # assert os.path.isfile(os.path.join(workflow.out, 'test_dF_t.pdf')) - # assert len(workflow.convergence) == 10 + def test_plot_dF_state(self, workflow): + '''test if the dF_state.pdf has been plotted.''' + assert os.path.isfile(os.path.join(workflow.out, 'dF_state.pdf')) + + def test_convergence_nosample_u_nk(self, workflow, monkeypatch): + '''test if the convergence routine would use the unsampled data + when the data has not been subsampled.''' + monkeypatch.setattr(workflow, 'u_nk_sample_list', + None) + workflow.check_convergence(10) + assert len(workflow.convergence) == 10 + + def test_dhdl_TI_noTI(self, workflow, monkeypatch): + '''Test to plot the dhdl_TI when ti estimator is not there''' + no_TI = workflow.estimator + no_TI.pop('ti') + monkeypatch.setattr(workflow, 'estimator', + no_TI) + with pytest.raises(ValueError): + workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') class Test_automatic_benzene(): '''Test the full automatic workflow for load_benzene from alchemtest.gmx for @@ -187,15 +162,15 @@ def test_convergence(self, workflow): class Test_unpertubed_lambda(): '''Test the if two lamdas present and one of them is not pertubed. - fep bound -time fep-lambda bound-lambda -0.0 0.5 0 12.958159 0 -10.0 0.5 0 -1.062968 0 -20.0 0.5 0 1.019020 0 -30.0 0.5 0 5.029051 0 -40.0 0.5 0 7.768072 0 + fep bound + time fep-lambda bound-lambda + 0.0 0.5 0 12.958159 0 + 10.0 0.5 0 -1.062968 0 + 20.0 0.5 0 1.019020 0 + 30.0 0.5 0 5.029051 0 + 40.0 0.5 0 7.768072 0 -Where only fep-lambda changes but the bonded-lambda is always 0. + Where only fep-lambda changes but the bonded-lambda is always 0. ''' @staticmethod @@ -235,7 +210,7 @@ def test_convergence(self, workflow): assert os.path.isfile(os.path.join(workflow.out, 'dF_t.pdf')) assert len(workflow.convergence) == 10 - def test_single_estimator_mbar(self, workflow): + def test_single_estimator_ti(self, workflow): workflow.estimate(methods='ti') summary = workflow.generate_result() assert np.isclose(summary['TI']['Stages']['TOTAL'], 2.946, 0.1) @@ -245,19 +220,22 @@ class Test_methods(): @staticmethod @pytest.fixture(scope='class') - def workflow(): + def workflow(tmp_path_factory): + outdir = tmp_path_factory.mktemp("out") dir = os.path.dirname(os.path.dirname( load_benzene()['data']['Coulomb'][0])) dir = os.path.join(dir, '*') workflow = ABFE(software='Gromacs', dir=dir, prefix='dhdl', - suffix='bz2', T=310) + suffix='bz2', T=310, out=outdir) workflow.read() return workflow def test_uncorr_threshold(self, workflow, monkeypatch): - monkeypatch.setattr(workflow.u_nk_list, + '''Test if the full data will be used when the number of data points + are less than the threshold.''' + monkeypatch.setattr(workflow, 'u_nk_list', [u_nk[:40] for u_nk in workflow.u_nk_list]) - monkeypatch.setattr(workflow.dHdl_list, + monkeypatch.setattr(workflow, 'dHdl_list', [dHdl[:40] for dHdl in workflow.dHdl_list]) workflow.preprocess(threshold=50) assert all([len(u_nk) == 40 for u_nk in workflow.u_nk_sample_list]) @@ -274,15 +252,14 @@ def test_single_estimator_ti(self, workflow): workflow.estimate(methods='ti') summary = workflow.generate_result() assert np.isclose(summary['TI']['Stages']['TOTAL'], 2.946, 0.1) - workflow.u_nk_list = u_nk_list def test_bar_convergence(self, workflow): workflow.check_convergence(10, estimator='bar') - assert os.path.isfile('dF_t.pdf') - os.remove('dF_t.pdf') + assert os.path.isfile(os.path.join(workflow.out, 'dF_t.pdf')) - def test_unprocessed_n_uk(self, workflow): - workflow.u_nk_sample_list = [] + def test_unprocessed_n_uk(self, workflow, monkeypatch): + monkeypatch.setattr(workflow, 'u_nk_sample_list', + None) workflow.estimate() assert len(workflow.estimator) == 3 assert 'mbar' in workflow.estimator diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 9dca623d..f312cefe 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -28,7 +28,7 @@ class ABFE(WorkflowBase): The unit used for printing and plotting results. {'kcal/mol', 'kJ/mol', 'kT'}. Default: 'kT'. software : str - The software used for generating input. {'Gromacs', } + The software used for generating input. {'GROMACS', } dir : str Directory in which data files are stored. Default: os.path.curdir. prefix : str @@ -152,6 +152,8 @@ def read(self): self.file_list))) self.u_nk_list = [u_nk_list[i] for i in index_list] self.dHdl_list = [dHdl_list[i] for i in index_list] + self.u_nk_sample_list = None + self.dHdl_sample_list = None def run(self, skiptime=0, uncorr=None, threshold=50, methods=None, overlap=None, breakdown=None, forwrev=None, *args, **kwargs): @@ -294,18 +296,18 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): self.estimator = {} # Use unprocessed data if preprocess is not performed. if 'ti' in methods: - try: + if self.dHdl_sample_list is not None: dHdl = concat(self.dHdl_sample_list) - except (AttributeError, ValueError): + else: dHdl = concat(self.dHdl_list) self.logger.warning('dHdl has not been preprocessed.') self.logger.info( 'A total {} lines of dHdl is used.'.format(len(dHdl))) if 'bar' in methods or 'mbar' in methods: - try: + if self.u_nk_sample_list is not None: u_nk = concat(self.u_nk_sample_list) - except (AttributeError, ValueError): + else: u_nk = concat(self.u_nk_list) self.logger.warning('u_nk has not been preprocessed.') self.logger.info( @@ -547,6 +549,8 @@ def plot_ti_dhdl(self, dhdl_TI='dhdl_TI.pdf', labels=None, colors=None, self.logger.info('Plot TI dHdl to {} under {}.' ''.format(dhdl_TI, self.out)) return ax + else: + raise ValueError('No TI data available in estimators.') def plot_dF_state(self, dF_state='dF_state.pdf', labels=None, colors=None, orientation='portrait', nb=10): @@ -613,11 +617,11 @@ def check_convergence(self, forwrev, estimator='autombar', dF_t='dF_t.pdf', self.logger.info('Start convergence analysis.') self.logger.info('Check data availability.') - if estimator.lower() in ['mbar', 'bar']: - try: + if estimator.lower() in ['mbar', 'bar', 'autombar']: + if self.u_nk_sample_list is not None: u_nk_list = self.u_nk_sample_list self.logger.info('Subsampled u_nk is available.') - except AttributeError: + else: try: u_nk_list = self.u_nk_list self.logger.info('Subsampled u_nk not available, ' @@ -628,10 +632,10 @@ def check_convergence(self, forwrev, estimator='autombar', dF_t='dF_t.pdf', estimator=estimator, num=forwrev) else: - try: + if self.dHdl_sample_list is not None: dHdl_list = self.dHdl_sample_list self.logger.info('Subsampled dHdl is available.') - except AttributeError: + else: try: dHdl_list = self.dHdl_list self.logger.info('Subsampled dHdl not available, ' From b58d11760ee7435ef5a0494c0c3101dce367662d Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 10 Apr 2022 17:44:59 +0100 Subject: [PATCH 075/123] update --- src/alchemlyb/tests/test_workflow_ABFE.py | 6 ++++++ src/alchemlyb/workflows/abfe.py | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index e667738a..10f48493 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -263,3 +263,9 @@ def test_unprocessed_n_uk(self, workflow, monkeypatch): workflow.estimate() assert len(workflow.estimator) == 3 assert 'mbar' in workflow.estimator + + def test_unprocessed_dhdl(self, workflow, monkeypatch): + monkeypatch.setattr(workflow, 'dHdl_sample_list', + None) + workflow.check_convergence(10, estimator='ti') + assert len(workflow.convergence) == 10 diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index f312cefe..ac9b8d16 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -622,11 +622,11 @@ def check_convergence(self, forwrev, estimator='autombar', dF_t='dF_t.pdf', u_nk_list = self.u_nk_sample_list self.logger.info('Subsampled u_nk is available.') else: - try: + if self.u_nk_list is not None: u_nk_list = self.u_nk_list self.logger.info('Subsampled u_nk not available, ' 'use original data instead.') - except AttributeError: # pragma: no cover + else: # pragma: no cover self.logger.warning('u_nk is not available.') convergence = forward_backward_convergence(u_nk_list, estimator=estimator, @@ -636,11 +636,11 @@ def check_convergence(self, forwrev, estimator='autombar', dF_t='dF_t.pdf', dHdl_list = self.dHdl_sample_list self.logger.info('Subsampled dHdl is available.') else: - try: + if self.dHdl_list is not None: dHdl_list = self.dHdl_list self.logger.info('Subsampled dHdl not available, ' 'use original data instead.') - except AttributeError: # pragma: no cover + else: # pragma: no cover self.logger.warning('dHdl is not available.') convergence = forward_backward_convergence(dHdl_list, estimator=estimator, From 58f52e76a9d82c6f9946915bd5e029f4e90536cf Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 10 Apr 2022 19:55:18 +0100 Subject: [PATCH 076/123] update --- src/alchemlyb/tests/test_workflow_ABFE.py | 21 +++++++++++++++++++++ src/alchemlyb/workflows/abfe.py | 5 ++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index 10f48493..16544978 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -4,6 +4,7 @@ from alchemlyb.workflows.abfe import ABFE from alchemtest.gmx import load_ABFE, load_benzene +from alchemtest.amber import load_bace_example class Test_automatic_ABFE(): '''Test the full automatic workflow for load_ABFE from alchemtest.gmx for @@ -269,3 +270,23 @@ def test_unprocessed_dhdl(self, workflow, monkeypatch): None) workflow.check_convergence(10, estimator='ti') assert len(workflow.convergence) == 10 + +class Test_automatic_amber(): + '''Test the full automatic workflow for load_ABFE from alchemtest.gmx for + three stage transformation.''' + + @staticmethod + @pytest.fixture(scope='session') + def workflow(tmp_path_factory): + outdir = tmp_path_factory.mktemp("out") + dir = os.path.dirname(load_bace_example()['data']['complex']['vdw'][0]) + workflow = ABFE(units='kcal/mol', software='Amber', dir=dir, + prefix='ti', suffix='bz2', T=310, out=str(outdir)) + workflow.read() + workflow.estimate(methods='ti') + return workflow + + def test_summary(self, workflow): + '''Test if if the summary is right.''' + summary = workflow.generate_result() + assert np.isclose(summary['TI']['Stages']['TOTAL'], 0.0, 0.1) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index ac9b8d16..32bf6425 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -7,7 +7,7 @@ import matplotlib.pyplot as plt from .base import WorkflowBase -from ..parsing import gmx +from ..parsing import gmx, amber from ..preprocessing.subsampling import decorrelate_dhdl, decorrelate_u_nk from ..estimators import BAR, TI from ..estimators import AutoMBAR as MBAR @@ -107,6 +107,9 @@ def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, software)) self._extract_u_nk = gmx.extract_u_nk self._extract_dHdl = gmx.extract_dHdl + elif software.lower() == 'amber': + self._extract_u_nk = amber.extract_u_nk + self._extract_dHdl = amber.extract_dHdl else: # pragma: no cover raise NameError('{} parser not found.'.format(software)) From a71d32f7e43aa3a36a67ebb05dd89cda41710061 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 10 Apr 2022 20:08:13 +0100 Subject: [PATCH 077/123] update --- src/alchemlyb/tests/test_workflow_ABFE.py | 6 ++++-- src/alchemlyb/workflows/abfe.py | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index 16544978..b43e1daf 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -279,7 +279,9 @@ class Test_automatic_amber(): @pytest.fixture(scope='session') def workflow(tmp_path_factory): outdir = tmp_path_factory.mktemp("out") - dir = os.path.dirname(load_bace_example()['data']['complex']['vdw'][0]) + dir, _ = os.path.split( + os.path.dirname(load_bace_example()['data']['complex']['vdw'][0])) + workflow = ABFE(units='kcal/mol', software='Amber', dir=dir, prefix='ti', suffix='bz2', T=310, out=str(outdir)) workflow.read() @@ -289,4 +291,4 @@ def workflow(tmp_path_factory): def test_summary(self, workflow): '''Test if if the summary is right.''' summary = workflow.generate_result() - assert np.isclose(summary['TI']['Stages']['TOTAL'], 0.0, 0.1) + assert np.isclose(summary['TI']['Stages']['TOTAL'], 1.40405980473, 0.1) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 32bf6425..609c9b1f 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -96,7 +96,8 @@ def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, self.logger.info('Finding files with prefix: {}, suffix: {} under ' 'directory {} produced by {}'.format(prefix, suffix, dir, software)) - self.file_list = glob(join(dir, prefix + '*' + suffix)) + self.file_list = glob(dir + '/**/' + prefix + '*' + suffix, \ + recursive=True) self.logger.info('Found {} xvg files.'.format(len(self.file_list))) self.logger.info('Unsorted file list: \n{}'.format('\n'.join( From 3dbc165a71c0b1c283c49c32367db52bfef34032 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 1 May 2022 14:38:53 +0100 Subject: [PATCH 078/123] update --- src/alchemlyb/tests/test_workflow_ABFE.py | 4 ++++ src/alchemlyb/workflows/abfe.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index b43e1daf..e3851deb 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -258,6 +258,10 @@ def test_bar_convergence(self, workflow): workflow.check_convergence(10, estimator='bar') assert os.path.isfile(os.path.join(workflow.out, 'dF_t.pdf')) + def test_ti_convergence(self, workflow): + workflow.check_convergence(10, estimator='ti') + assert os.path.isfile(os.path.join(workflow.out, 'dF_t.pdf')) + def test_unprocessed_n_uk(self, workflow, monkeypatch): monkeypatch.setattr(workflow, 'u_nk_sample_list', None) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 609c9b1f..901b04d4 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -143,7 +143,7 @@ def read(self): index_list = sorted(range(len(self.file_list)), key=lambda x:column_names.index( u_nk_list[x].reset_index('time').index.values[0])) - else: + else: # pragma: no cover self.logger.info('Sort files according to the dHdl.') column_names = sorted([dHdl.reset_index('time').index.values[0] for dHdl in dHdl_list]) From f0450174d51c4a1a8820c419f0286d3d1c96e84c Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 1 May 2022 14:50:25 +0100 Subject: [PATCH 079/123] update --- src/alchemlyb/tests/test_workflow_ABFE.py | 5 +++++ src/alchemlyb/workflows/abfe.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index e3851deb..c77be8ae 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -231,6 +231,11 @@ def workflow(tmp_path_factory): workflow.read() return workflow + def test_run_none(self, workflow): + '''Don't run anything''' + workflow.run(uncorr=None, methods=None, overlap=None, breakdown=None, + forwrev=None) + def test_uncorr_threshold(self, workflow, monkeypatch): '''Test if the full data will be used when the number of data points are less than the threshold.''' diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 901b04d4..609c9b1f 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -143,7 +143,7 @@ def read(self): index_list = sorted(range(len(self.file_list)), key=lambda x:column_names.index( u_nk_list[x].reset_index('time').index.values[0])) - else: # pragma: no cover + else: self.logger.info('Sort files according to the dHdl.') column_names = sorted([dHdl.reset_index('time').index.values[0] for dHdl in dHdl_list]) From b5a27a340300fbfbec51ded3d766a792ac603869 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 15 May 2022 10:26:00 +0100 Subject: [PATCH 080/123] update --- docs/workflows.rst | 1 + docs/workflows/alchemlyb.workflows.abfe.rst | 113 ++++++++++++++++++++ src/alchemlyb/tests/test_workflow_ABFE.py | 4 +- src/alchemlyb/workflows/__init__.py | 2 +- src/alchemlyb/workflows/abfe.py | 85 +++++++++------ src/alchemlyb/workflows/base.py | 1 + 6 files changed, 168 insertions(+), 38 deletions(-) create mode 100644 docs/workflows/alchemlyb.workflows.abfe.rst diff --git a/docs/workflows.rst b/docs/workflows.rst index 3296edae..6ca34b1f 100644 --- a/docs/workflows.rst +++ b/docs/workflows.rst @@ -13,4 +13,5 @@ For developers, the skeleton of the workflow should follow the example in :toctree: workflows base + ABFE diff --git a/docs/workflows/alchemlyb.workflows.abfe.rst b/docs/workflows/alchemlyb.workflows.abfe.rst new file mode 100644 index 00000000..dbbfd4a8 --- /dev/null +++ b/docs/workflows/alchemlyb.workflows.abfe.rst @@ -0,0 +1,113 @@ +The ABFE workflow +================== +Though **alchemlyb** is a library offering great flexibility in deriving free +energy estimate, it also provide a easy pipeline that is similar to +`Alchemical Analysis `_ and a +step-by-step version that allows more flexibility. + +Fully Automatic analysis +------------------------ +A interface similar to +`Alchemical Analysis `_ +could be excuted with two lines of command. :: + + >>> import os + >>> from alchemtest.gmx import load_ABFE + >>> from alchemlyb.workflows import ABFE + >>> # Obtain the path of the data + >>> dir = os.path.dirname(load_ABFE()['data']['complex'][0]) + >>> print(dir) + 'alchemtest/gmx/ABFE/complex' + >>> workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, + >>> prefix='dhdl', suffix='xvg', T=298, out='./') + >>> workflow.run(skiptime=10, uncorr='dhdl', threshold=50, + >>> methods=('mbar', 'bar', 'ti'), overlap='O_MBAR.pdf', + >>> breakdown=True, forwrev=10) + +This would give the free energy estimate using all of +:class:`~alchemlyb.estimators.TI`, :class:`~alchemlyb.estimators.BAR`, +:class:`~alchemlyb.estimators.MBAR` and the result will be stored in +:attr:`~alchemlyb.workflows.ABFE.summary` as :class:`pandas.Dataframe`. :: + + + MBAR MBAR_Error BAR BAR_Error TI TI_Error + States 0 -- 1 0.065967 0.001293 0.066544 0.001661 0.066663 0.001675 + 1 -- 2 0.089774 0.001398 0.089303 0.002101 0.089566 0.002144 + 2 -- 3 0.132036 0.001638 0.132687 0.002990 0.133292 0.003055 + ... + 26 -- 27 1.243745 0.011239 1.245873 0.015711 1.248959 0.015762 + 27 -- 28 1.128429 0.012859 1.124554 0.016999 1.121892 0.016962 + 28 -- 29 1.010313 0.016442 1.005444 0.017692 1.019747 0.017257 + Stages coul 10.215658 0.033903 10.017838 0.041839 10.017854 0.048744 + vdw 22.547489 0.098699 22.501150 0.060092 22.542936 0.106723 + bonded 2.374144 0.014995 2.341631 0.005507 2.363828 0.021078 + TOTAL 35.137291 0.103580 34.860619 0.087022 34.924618 0.119206 + +The :ref:`overlay matrix for the MBAR estimator ` will be +plotted and saved to `O_MBAR.pdf`. + +The :ref:`dHdl for TI ` will be plotted to `dhdl_TI.pdf`. + +The :ref:`dF states ` will be plotted to `dF_state.pdf` in +portrait model and `dF_state_long.pdf` in landscape model. + +The forward and backward convergence will be plotted to `dF_t.pdf` using +:class:`~alchemlyb.estimators.MBAR` and save in +:attr:`~alchemlyb.workflows.ABFE.convergence`. + +.. currentmodule:: alchemlyb.workflows + +.. autoclass:: ABFE + :no-index: + :members: + .. automethod:: run + + + + + +Semi-automatic analysis +----------------------- +The same analysis could also performed in steps allowing access and modification +to the data generated at each stage of the analysis. :: + + >>> import os + >>> from alchemtest.gmx import load_ABFE + >>> from alchemlyb.workflows import ABFE + >>> # Obtain the path of the data + >>> dir = os.path.dirname(load_ABFE()['data']['complex'][0]) + >>> print(dir) + 'alchemtest/gmx/ABFE/complex' + >>> # Load the data + >>> workflow = ABFE(software='Gromacs', dir=dir, + >>> prefix='dhdl', suffix='xvg', T=298, out='./') + >>> # Set the unit. + >>> workflow.update_units('kcal/mol') + >>> # Read the data + >>> workflow.read() + >>> # Decorrelate the data. + >>> workflow.preprocess(skiptime=10, uncorr='dhdl', threshold=50) + >>> # Run the estimator + >>> workflow.estimate(methods=('mbar', 'bar', 'ti')) + >>> # Retrieve the result + >>> summary = workflow.generate_result() + >>> # Plot the overlap matrix + >>> workflow.plot_overlap_matrix(overlap='O_MBAR.pdf') + >>> # Plot the dHdl for TI + >>> workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') + >>> # Plot the dF states + >>> workflow.plot_dF_state(dF_state='dF_state.pdf') + >>> # Convergence analysis + >>> workflow.check_convergence(10, dF_t='dF_t.pdf') + +.. currentmodule:: alchemlyb.workflows.ABFE + +.. autofunction:: update_units +.. autofunction:: read +.. autofunction:: preprocess +.. autofunction:: estimate +.. autofunction:: generate_result +.. autofunction:: plot_overlap_matrix +.. autofunction:: plot_ti_dhdl +.. autofunction:: plot_dF_state +.. autofunction:: check_convergence \ No newline at end of file diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index c77be8ae..e0d352bf 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -261,11 +261,11 @@ def test_single_estimator_ti(self, workflow): def test_bar_convergence(self, workflow): workflow.check_convergence(10, estimator='bar') - assert os.path.isfile(os.path.join(workflow.out, 'dF_t.pdf')) + assert len(workflow.convergence) == 10 def test_ti_convergence(self, workflow): workflow.check_convergence(10, estimator='ti') - assert os.path.isfile(os.path.join(workflow.out, 'dF_t.pdf')) + assert len(workflow.convergence) == 10 def test_unprocessed_n_uk(self, workflow, monkeypatch): monkeypatch.setattr(workflow, 'u_nk_sample_list', diff --git a/src/alchemlyb/workflows/__init__.py b/src/alchemlyb/workflows/__init__.py index 1fc942bb..6b35d460 100644 --- a/src/alchemlyb/workflows/__init__.py +++ b/src/alchemlyb/workflows/__init__.py @@ -1,4 +1,4 @@ __all__ = [ 'base', - 'abfe', ] +from .abfe import ABFE diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 609c9b1f..58fe9cb9 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -28,7 +28,7 @@ class ABFE(WorkflowBase): The unit used for printing and plotting results. {'kcal/mol', 'kJ/mol', 'kT'}. Default: 'kT'. software : str - The software used for generating input. {'GROMACS', } + The software used for generating input. {'GROMACS', 'AMBER'} dir : str Directory in which data files are stored. Default: os.path.curdir. prefix : str @@ -37,38 +37,9 @@ class ABFE(WorkflowBase): Suffix for datafile sets. Default: 'xvg'. T : float Temperature in K. Default: 298. - skiptime : float - Discard data prior to this specified time as 'equilibration' data. Units - picoseconds. Default: 0. - uncorr : str - The observable to be used for the autocorrelation analysis; 'dhdl' - (obtained as a sum over those energy components that are changing). - Default: `dhdl` - threshold : int - Proceed with correlated samples if the number of uncorrelated samples is - found to be less than this number. If 0 is given, the time series - analysis will not be performed at all. Default: 50. - methods : str - A list of the methods to esitimate the free energy with. Default: None. out : str Directory in which the output files produced by this script will be stored. Default: os.path.curdir. - overlap : str - The filename for the plot of overlap matrix. Default: None. (not - plotting). - breakdown : bool - Plot the free energy differences evaluated for each pair of adjacent - states for all methods, including the dH/dlambda curve for TI. Default: - None. (not plotting). - forwrev : int - Plot the free energy change as a function of time in both directions, - with the specified number of points in the time plot. The number of time - points (an integer) must be provided. Default: None. (not doing - convergence analysis). - log : str - The filename of the log file. The workflow logs under - alchemlyb.workflows.ABFE. Default: - 'result.log' Attributes ---------- @@ -76,10 +47,6 @@ class ABFE(WorkflowBase): The logging object. file_list : list The list of filenames sorted by the lambda state. - u_nk_list : list - The list of u_nk read from the files. - dHdl_list : list - The list of dHdl read from the files. ''' def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, prefix='dhdl', suffix='xvg', T=298, out=os.path.curdir): @@ -96,7 +63,7 @@ def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, self.logger.info('Finding files with prefix: {}, suffix: {} under ' 'directory {} produced by {}'.format(prefix, suffix, dir, software)) - self.file_list = glob(dir + '/**/' + prefix + '*' + suffix, \ + self.file_list = glob(dir + '/**/' + prefix + '*' + suffix, recursive=True) self.logger.info('Found {} xvg files.'.format(len(self.file_list))) @@ -115,6 +82,16 @@ def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, raise NameError('{} parser not found.'.format(software)) def read(self): + '''Read the u_nk and dHdL data from the + :attr:`~alchemlyb.workflows.ABFE.file_list` + + Attributes + ---------- + u_nk_list : list + A list of :class:`pandas.DataFrame` of u_nk. + dHdl_list : list + A list of :class:`pandas.DataFrame` of dHdl. + ''' u_nk_list = [] dHdl_list = [] for xvg in self.file_list: @@ -161,6 +138,44 @@ def read(self): def run(self, skiptime=0, uncorr=None, threshold=50, methods=None, overlap=None, breakdown=None, forwrev=None, *args, **kwargs): + ''' The method for running the automatic analysis. + + Parameters + ---------- + skiptime : float + Discard data prior to this specified time as 'equilibration' data. Units + picoseconds. Default: 0. + uncorr : str + The observable to be used for the autocorrelation analysis; 'dhdl' + (obtained as a sum over those energy components that are changing). + Default: `dhdl` + threshold : int + Proceed with correlated samples if the number of uncorrelated samples is + found to be less than this number. If 0 is given, the time series + analysis will not be performed at all. Default: 50. + methods : str + A list of the methods to esitimate the free energy with. Default: None. + + overlap : str + The filename for the plot of overlap matrix. Default: None. (not + plotting). + breakdown : bool + Plot the free energy differences evaluated for each pair of adjacent + states for all methods, including the dH/dlambda curve for TI. Default: + None. (not plotting). + forwrev : int + Plot the free energy change as a function of time in both directions, + with the specified number of points in the time plot. The number of time + points (an integer) must be provided. Default: None. (not doing + convergence analysis). + + Attributes + ---------- + summary : Dataframe + The summary of the free energy estimate. + convergence : DataFrame + The summary of the convergence results. + ''' self.read() if uncorr is not None: diff --git a/src/alchemlyb/workflows/base.py b/src/alchemlyb/workflows/base.py index 83fcefb4..e3d76f07 100644 --- a/src/alchemlyb/workflows/base.py +++ b/src/alchemlyb/workflows/base.py @@ -37,6 +37,7 @@ class WorkflowBase(): """ def __init__(self, units='kT', software='Gromacs', T=298, out='./', *args, **kwargs): + self.T = T self.software = software self.unit = units From 87c473e72f63f7b68b127e53d6445c64d6c03df5 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 15 May 2022 10:41:43 +0100 Subject: [PATCH 081/123] update --- docs/workflows.rst | 5 ++++ ....abfe.rst => alchemlyb.workflows.ABFE.rst} | 27 ++++--------------- src/alchemlyb/workflows/abfe.py | 20 +++++++------- src/alchemlyb/workflows/base.py | 2 +- 4 files changed, 21 insertions(+), 33 deletions(-) rename docs/workflows/{alchemlyb.workflows.abfe.rst => alchemlyb.workflows.ABFE.rst} (90%) diff --git a/docs/workflows.rst b/docs/workflows.rst index 6ca34b1f..c8ecfc78 100644 --- a/docs/workflows.rst +++ b/docs/workflows.rst @@ -7,6 +7,11 @@ of the results and step-by-step version that allows more flexibility. For developers, the skeleton of the workflow should follow the example in :class:`alchemlyb.workflows.base.WorkflowBase`. +For users, **alchemlyb** offered a workflow :class:`alchemlyb.workflows.ABFE` +similar to +`Alchemical Analysis `_ +for doing automatic ABFE analysis. + .. currentmodule:: alchemlyb.workflows .. autosummary:: diff --git a/docs/workflows/alchemlyb.workflows.abfe.rst b/docs/workflows/alchemlyb.workflows.ABFE.rst similarity index 90% rename from docs/workflows/alchemlyb.workflows.abfe.rst rename to docs/workflows/alchemlyb.workflows.ABFE.rst index dbbfd4a8..47f125fa 100644 --- a/docs/workflows/alchemlyb.workflows.abfe.rst +++ b/docs/workflows/alchemlyb.workflows.ABFE.rst @@ -55,17 +55,6 @@ The forward and backward convergence will be plotted to `dF_t.pdf` using :class:`~alchemlyb.estimators.MBAR` and save in :attr:`~alchemlyb.workflows.ABFE.convergence`. -.. currentmodule:: alchemlyb.workflows - -.. autoclass:: ABFE - :no-index: - :members: - .. automethod:: run - - - - - Semi-automatic analysis ----------------------- The same analysis could also performed in steps allowing access and modification @@ -100,14 +89,8 @@ to the data generated at each stage of the analysis. :: >>> # Convergence analysis >>> workflow.check_convergence(10, dF_t='dF_t.pdf') -.. currentmodule:: alchemlyb.workflows.ABFE - -.. autofunction:: update_units -.. autofunction:: read -.. autofunction:: preprocess -.. autofunction:: estimate -.. autofunction:: generate_result -.. autofunction:: plot_overlap_matrix -.. autofunction:: plot_ti_dhdl -.. autofunction:: plot_dF_state -.. autofunction:: check_convergence \ No newline at end of file +API Reference +------------- +.. autoclass:: alchemlyb.workflows.ABFE + :members: + :inherited-members: \ No newline at end of file diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 58fe9cb9..5221f05f 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -136,8 +136,9 @@ def read(self): self.u_nk_sample_list = None self.dHdl_sample_list = None - def run(self, skiptime=0, uncorr=None, threshold=50, methods=None, - overlap=None, breakdown=None, forwrev=None, *args, **kwargs): + def run(self, skiptime=0, uncorr='dhdl', threshold=50, + methods=('mbar', 'bar', 'ti'), overlap='O_MBAR.pdf', + breakdown=True, forwrev=10, *args, **kwargs): ''' The method for running the automatic analysis. Parameters @@ -148,26 +149,25 @@ def run(self, skiptime=0, uncorr=None, threshold=50, methods=None, uncorr : str The observable to be used for the autocorrelation analysis; 'dhdl' (obtained as a sum over those energy components that are changing). - Default: `dhdl` + Specify as `None` will not uncorrelate the data. Default: `dhdl`. threshold : int Proceed with correlated samples if the number of uncorrelated samples is found to be less than this number. If 0 is given, the time series analysis will not be performed at all. Default: 50. methods : str - A list of the methods to esitimate the free energy with. Default: None. - + A list of the methods to esitimate the free energy with. Default: + `('mbar', 'bar', 'ti')`. overlap : str - The filename for the plot of overlap matrix. Default: None. (not - plotting). + The filename for the plot of overlap matrix. Default: `O_MBAR.pdf`. breakdown : bool Plot the free energy differences evaluated for each pair of adjacent states for all methods, including the dH/dlambda curve for TI. Default: - None. (not plotting). + True. forwrev : int Plot the free energy change as a function of time in both directions, with the specified number of points in the time plot. The number of time - points (an integer) must be provided. Default: None. (not doing - convergence analysis). + points (an integer) must be provided. Specify as `None` will not do + the convergence analysis. Default: 10. Attributes ---------- diff --git a/src/alchemlyb/workflows/base.py b/src/alchemlyb/workflows/base.py index e3d76f07..abdd51f3 100644 --- a/src/alchemlyb/workflows/base.py +++ b/src/alchemlyb/workflows/base.py @@ -37,7 +37,7 @@ class WorkflowBase(): """ def __init__(self, units='kT', software='Gromacs', T=298, out='./', *args, **kwargs): - + self.T = T self.software = software self.unit = units From 5ee44a80ea668a5763fe147f2870a4c847611c8a Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 15 May 2022 10:44:00 +0100 Subject: [PATCH 082/123] update --- CHANGES | 1 + docs/api_principles.rst | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 5ee26a9f..ef05cdd1 100644 --- a/CHANGES +++ b/CHANGES @@ -24,6 +24,7 @@ Changes Enhancements - Add a base class for workflows (PR #188). + - Add the ABFE workflow (PR #114). - Add filter function to gmx.extract to make it more robust (PR #183): can filter incomplete/corrupted lines (#126, #171) with filter=True. diff --git a/docs/api_principles.rst b/docs/api_principles.rst index 7abbe7f7..7584128e 100644 --- a/docs/api_principles.rst +++ b/docs/api_principles.rst @@ -65,7 +65,8 @@ The library is structured as follows, following a similar style to │   ├── ti_dhdl.py │   └── ... └── workflows ### WORK IN PROGRESS - └── ... + │   ├── base.py + │   ├── abfe.py * The :mod:`~alchemlyb.parsing` submodule contains parsers for individual MD engines, since the output files needed to perform alchemical free energy calculations vary widely and are not standardized. Each module at the very least provides an `extract_u_nk` function for extracting reduced potentials (needed for MBAR), as well as an `extract_dHdl` function for extracting derivatives required for thermodynamic integration. Other helper functions may be exposed for additional processing, such as generating an XVG file from an EDR file in the case of GROMACS. All `extract\_*` functions take similar arguments (a file path, parameters such as temperature), and produce standard outputs (:class:`pandas.DataFrame` for reduced potentials, :class:`pandas.Series` for derivatives). From db5232ee884119c0edefb908c64ea356f877bfca Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 15 May 2022 10:53:25 +0100 Subject: [PATCH 083/123] Update abfe.py --- src/alchemlyb/workflows/abfe.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 5221f05f..72ac57f4 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -37,7 +37,7 @@ class ABFE(WorkflowBase): Suffix for datafile sets. Default: 'xvg'. T : float Temperature in K. Default: 298. - out : str + outdirectory : str Directory in which the output files produced by this script will be stored. Default: os.path.curdir. @@ -49,9 +49,9 @@ class ABFE(WorkflowBase): The list of filenames sorted by the lambda state. ''' def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, - prefix='dhdl', suffix='xvg', T=298, out=os.path.curdir): + prefix='dhdl', suffix='xvg', T=298, outdirectory=os.path.curdir): - super().__init__(units, software, T, out) + super().__init__(units, software, T, outdirectory) self.logger = logging.getLogger('alchemlyb.workflows.ABFE') self.logger.info('Initialise Alchemlyb ABFE Workflow') self.logger.info('Alchemlyb Version: {}'.format(__version__)) From 1d2d536d22f8ba1c6c61f4727d825602ab071951 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 15 May 2022 11:11:01 +0100 Subject: [PATCH 084/123] update --- src/alchemlyb/tests/test_workflow_ABFE.py | 12 +-- src/alchemlyb/workflows/abfe.py | 101 ++++++++++------------ 2 files changed, 51 insertions(+), 62 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index e0d352bf..fd67932a 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -16,7 +16,7 @@ def workflow(tmp_path_factory): outdir = tmp_path_factory.mktemp("out") dir = os.path.dirname(load_ABFE()['data']['complex'][0]) workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, - prefix='dhdl', suffix='xvg', T=310, out=str(outdir)) + prefix='dhdl', suffix='xvg', T=310, outdirectory=str(outdir)) workflow.run(skiptime=10, uncorr='dhdl', threshold=50, methods=('mbar', 'bar', 'ti'), overlap='O_MBAR.pdf', breakdown=True, forwrev=10) @@ -78,7 +78,7 @@ def workflow(tmp_path_factory): outdir = tmp_path_factory.mktemp("out") dir = os.path.dirname(load_ABFE()['data']['complex'][0]) workflow = ABFE(software='Gromacs', dir=dir, prefix='dhdl', - suffix='xvg', T=310, out=str(outdir)) + suffix='xvg', T=310, outdirectory=str(outdir)) workflow.update_units('kcal/mol') workflow.read() workflow.preprocess(skiptime=10, uncorr='dhdl', threshold=50) @@ -123,7 +123,7 @@ def workflow(tmp_path_factory): dir = os.path.join(dir, '*') workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, prefix='dhdl', suffix='bz2', T=310, - out=outdir) + outdirectory=outdir) workflow.run(skiptime=0, uncorr='dhdl', threshold=50, methods=('mbar', 'bar', 'ti'), overlap='O_MBAR.pdf', breakdown=True, forwrev=10) @@ -182,7 +182,7 @@ def workflow(tmp_path_factory): load_benzene()['data']['Coulomb'][0])) dir = os.path.join(dir, '*') workflow = ABFE(software='Gromacs', dir=dir, prefix='dhdl', - suffix='bz2', T=310, out=outdir) + suffix='bz2', T=310, outdirectory=outdir) workflow.read() # Block the n_uk workflow.u_nk_list = [] @@ -227,7 +227,7 @@ def workflow(tmp_path_factory): load_benzene()['data']['Coulomb'][0])) dir = os.path.join(dir, '*') workflow = ABFE(software='Gromacs', dir=dir, prefix='dhdl', - suffix='bz2', T=310, out=outdir) + suffix='bz2', T=310, outdirectory=outdir) workflow.read() return workflow @@ -292,7 +292,7 @@ def workflow(tmp_path_factory): os.path.dirname(load_bace_example()['data']['complex']['vdw'][0])) workflow = ABFE(units='kcal/mol', software='Amber', dir=dir, - prefix='ti', suffix='bz2', T=310, out=str(outdir)) + prefix='ti', suffix='bz2', T=310, outdirectory=str(outdir)) workflow.read() workflow.estimate(methods='ti') return workflow diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 72ac57f4..c9ce2b98 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -54,32 +54,31 @@ def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, super().__init__(units, software, T, outdirectory) self.logger = logging.getLogger('alchemlyb.workflows.ABFE') self.logger.info('Initialise Alchemlyb ABFE Workflow') - self.logger.info('Alchemlyb Version: {}'.format(__version__)) - self.logger.info('Set Temperature to {} K.'.format(T)) - self.logger.info('Set Software to {}.'.format(software)) + self.logger.info(f'Alchemlyb Version: f{__version__}') + self.logger.info(f'Set Temperature to {T} K.') + self.logger.info(f'Set Software to {software}.') self.update_units(units) - self.logger.info('Finding files with prefix: {}, suffix: {} under ' - 'directory {} produced by {}'.format(prefix, suffix, - dir, software)) + self.logger.info(f'Finding files with prefix: {prefix}, suffix: ' + f'{suffix} under directory {dir} produced by ' + f'{software}') self.file_list = glob(dir + '/**/' + prefix + '*' + suffix, recursive=True) - self.logger.info('Found {} xvg files.'.format(len(self.file_list))) - self.logger.info('Unsorted file list: \n{}'.format('\n'.join( - self.file_list))) + self.logger.info(f'Found {len(self.file_list)} xvg files.') + self.logger.info("Unsorted file list: \n %s", '\n'.join( + self.file_list)) if software.lower() == 'gromacs': - self.logger.info('Using {} parser to read the data.'.format( - software)) + self.logger.info(f'Using {software} parser to read the data.') self._extract_u_nk = gmx.extract_u_nk self._extract_dHdl = gmx.extract_dHdl elif software.lower() == 'amber': self._extract_u_nk = amber.extract_u_nk self._extract_dHdl = amber.extract_dHdl else: # pragma: no cover - raise NameError('{} parser not found.'.format(software)) + raise NameError(f'{software} parser not found.') def read(self): '''Read the u_nk and dHdL data from the @@ -98,20 +97,20 @@ def read(self): try: u_nk = self._extract_u_nk(xvg, T=self.T) self.logger.info( - 'Reading {} lines of u_nk from {}'.format(len(u_nk), xvg)) + f'Reading {len(u_nk)} lines of u_nk from {xvg}') u_nk_list.append(u_nk) except: # pragma: no cover self.logger.warning( - 'Error reading read u_nk from {}.'.format(xvg)) + f'Error reading read u_nk from {xvg}.') try: dhdl = self._extract_dHdl(xvg, T=self.T) self.logger.info( - 'Reading {} lines of dhdl from {}'.format(len(dhdl), xvg)) + f'Reading {len(dhdl)} lines of dhdl from {xvg}') dHdl_list.append(dhdl) except: # pragma: no cover self.logger.warning( - 'Error reading read dhdl from {}.'.format(xvg)) + f'Error reading read dhdl from {xvg}.') # Sort the files according to the state if len(u_nk_list) > 0: @@ -129,8 +128,7 @@ def read(self): dHdl_list[x].reset_index('time').index.values[0])) self.file_list = [self.file_list[i] for i in index_list] - self.logger.info('Sorted file list: \n{}'.format('\n'.join( - self.file_list))) + self.logger.info("Sorted file list: \n %s", '\n'.join(self.file_list)) self.u_nk_list = [u_nk_list[i] for i in index_list] self.dHdl_list = [dHdl_list[i] for i in index_list] self.u_nk_sample_list = None @@ -215,7 +213,7 @@ def update_units(self, units): ''' if units is not None: - self.logger.info('Set unit to {}.'.format(units)) + self.logger.info(f'Set unit to {units}.') self.units = units else: # pragma: no cover pass @@ -245,13 +243,12 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): dHdl_sample_list : list The list of dHdl after decorrelation. ''' - self.logger.info('Start preprocessing with skiptime of {} ' - 'uncorrelation method of {} and ' - 'threshold of {}'.format(skiptime, uncorr, threshold)) + self.logger.info(f'Start preprocessing with skiptime of {skiptime} ' + f'uncorrelation method of {uncorr} and threshold of ' + f'{threshold}') if len(self.u_nk_list) > 0: self.logger.info( - 'Processing the u_nk data set with skiptime of {}.'.format( - skiptime)) + f'Processing the u_nk data set with skiptime of {skiptime}.') self.u_nk_sample_list = [] for index, u_nk in enumerate(self.u_nk_list): @@ -261,14 +258,14 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): subsample = decorrelate_u_nk(u_nk, uncorr) if len(subsample) < threshold: - self.logger.warning('Number of u_nk {} for state {} is ' - 'less than the threshold {}.'.format( - len(subsample), index, threshold)) - self.logger.info('Take all the u_nk for state {}.'.format(index)) + self.logger.warning(f'Number of u_nk {len(subsample)} ' + f'for state {index} is less than the ' + f'threshold {threshold}.') + self.logger.info(f'Take all the u_nk for state {index}.') self.u_nk_sample_list.append(u_nk) else: - self.logger.info('Take {} uncorrelated u_nk for state ' - '{}.'.format(len(subsample), index)) + self.logger.info(f'Take {len(subsample)} uncorrelated ' + f'u_nk for state {index}.') self.u_nk_sample_list.append(subsample) else: # pragma: no cover self.logger.info('No u_nk data being subsampled') @@ -279,14 +276,14 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): dHdl = dHdl[dHdl.index.get_level_values('time') >= skiptime] subsample = decorrelate_dhdl(dHdl) if len(subsample) < threshold: - self.logger.warning('Number of dHdl {} for state {} is ' - 'less than the threshold {}.'.format( - len(subsample), index, threshold)) - self.logger.info('Take all the dHdl for state {}.'.format(index)) + self.logger.warning(f'Number of dHdl {len(subsample)} for ' + f'state {index} is less than the ' + f'threshold {threshold}.') + self.logger.info(f'Take all the dHdl for state {index}.') self.dHdl_sample_list.append(dHdl) else: - self.logger.info('Take {} uncorrelated dHdl for state ' - '{}.'.format(len(subsample), index)) + self.logger.info(f'Take {len(subsample)} uncorrelated ' + f'dHdl for state {index}.') self.dHdl_sample_list.append(subsample) else: # pragma: no cover self.logger.info('No dHdl data being subsampled') @@ -311,7 +308,7 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): methods = (methods, ) self.logger.info( - 'Start running estimator: {}.'.format(','.join(methods))) + f"Start running estimator: {','.join(methods)}.") self.estimator = {} # Use unprocessed data if preprocess is not performed. if 'ti' in methods: @@ -321,7 +318,7 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): dHdl = concat(self.dHdl_list) self.logger.warning('dHdl has not been preprocessed.') self.logger.info( - 'A total {} lines of dHdl is used.'.format(len(dHdl))) + f'A total {len(dHdl)} lines of dHdl is used.') if 'bar' in methods or 'mbar' in methods: if self.u_nk_sample_list is not None: @@ -330,7 +327,7 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): u_nk = concat(self.u_nk_list) self.logger.warning('u_nk has not been preprocessed.') self.logger.info( - 'A total {} lines of u_nk is used.'.format(len(u_nk))) + f'A total {len(u_nk)} lines of u_nk is used.') for estimator in methods: if estimator.lower() == 'mbar' and len(u_nk) > 0: @@ -346,7 +343,7 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): self.logger.warning('MBAR or BAR estimator require u_nk') else: # pragma: no cover self.logger.warning( - '{} is not a valid estimator.'.format(estimator)) + f'{estimator} is not a valid estimator.') def generate_result(self): '''Summarise the result into a dataframe. @@ -429,8 +426,7 @@ def generate_result(self): col_names = [] for estimator_name, estimator in self.estimator.items(): - self.logger.info('Read the results from estimator {}'.format( - estimator_name)) + self.logger.info(f'Read the results from estimator {estimator_name}') # Do the unit conversion delta_f_ = estimator.delta_f_ @@ -447,9 +443,7 @@ def generate_result(self): data_dict[estimator_name.upper() + '_Error'].append( d_delta_f_.iloc[index - 1, index]) - self.logger.info('Generate the staged result from estimator {' - '}'.format( - estimator_name)) + self.logger.info(f'Generate the staged result from estimator {estimator_name}') for index, stage in enumerate(stages): if len(stages) == 1: start = 0 @@ -471,8 +465,7 @@ def generate_result(self): start = num_states - start - 1 end = states.index(lambda_max) self.logger.info( - 'Stage {} is from state {} to state {}.'.format( - stage, start, end)) + f'Stage {stage} is from state {start} to state {end}.') result = delta_f_.iloc[start, end] if estimator_name != 'bar': error = d_delta_f_.iloc[start, end] @@ -505,7 +498,7 @@ def generate_result(self): converter = get_unit_converter(self.units) summary = converter(summary) self.summary = summary - self.logger.info('Write results:\n{}'.format(summary.to_string())) + self.logger.info(f'Write results:\n{summary.to_string()}') return summary def plot_overlap_matrix(self, overlap='O_MBAR.pdf', ax=None): @@ -530,8 +523,7 @@ def plot_overlap_matrix(self, overlap='O_MBAR.pdf', ax=None): ax = plot_mbar_overlap_matrix(self.estimator['mbar'].overlap_matrix, ax=ax) ax.figure.savefig(join(self.out, overlap)) - self.logger.info('Plot overlap matrix to {} under {}.' - ''.format(self.out, overlap)) + self.logger.info(f'Plot overlap matrix to {self.out} under {overlap}.') return ax else: # pragma: no cover self.logger.warning('MBAR estimator not found. ' @@ -565,8 +557,7 @@ def plot_ti_dhdl(self, dhdl_TI='dhdl_TI.pdf', labels=None, colors=None, ax = plot_ti_dhdl(self.estimator['ti'], units=self.units, labels=labels, colors=colors, ax=ax) ax.figure.savefig(join(self.out, dhdl_TI)) - self.logger.info('Plot TI dHdl to {} under {}.' - ''.format(dhdl_TI, self.out)) + self.logger.info(f'Plot TI dHdl to {dhdl_TI} under {self.out}.') return ax else: raise ValueError('No TI data available in estimators.') @@ -599,8 +590,7 @@ def plot_dF_state(self, dF_state='dF_state.pdf', labels=None, colors=None, units=self.units, orientation=orientation, nb=nb) fig.savefig(join(self.out, dF_state)) - self.logger.info('Plot dF state to {} under {}.' - ''.format(dF_state, self.out)) + self.logger.info(f'Plot dF state to {dF_state} under {self.out}.') return fig def check_convergence(self, forwrev, estimator='autombar', dF_t='dF_t.pdf', @@ -667,8 +657,7 @@ def check_convergence(self, forwrev, estimator='autombar', dF_t='dF_t.pdf', self.convergence = get_unit_converter(self.units)(convergence) - self.logger.info('Plot convergence analysis to {} under {}.' - ''.format(dF_t, self.out)) + self.logger.info(f'Plot convergence analysis to {dF_t} under {self.out}.') ax = plot_convergence(self.convergence, units=self.units, ax=ax) From 1d28042b84eda5e6c39b8b503c8e64fac80b152d Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 25 Jun 2022 16:05:25 +0100 Subject: [PATCH 085/123] update --- docs/workflows/alchemlyb.workflows.ABFE.rst | 52 ++++++++++--- src/alchemlyb/tests/test_workflow_ABFE.py | 2 +- src/alchemlyb/workflows/abfe.py | 86 ++++++++++++--------- 3 files changed, 92 insertions(+), 48 deletions(-) diff --git a/docs/workflows/alchemlyb.workflows.ABFE.rst b/docs/workflows/alchemlyb.workflows.ABFE.rst index 47f125fa..e11d5095 100644 --- a/docs/workflows/alchemlyb.workflows.ABFE.rst +++ b/docs/workflows/alchemlyb.workflows.ABFE.rst @@ -9,7 +9,8 @@ Fully Automatic analysis ------------------------ A interface similar to `Alchemical Analysis `_ -could be excuted with two lines of command. :: +could be excuted using :mod:`~alchemlyb.workflows.ABFE` with two lines of +command. :: >>> import os >>> from alchemtest.gmx import load_ABFE @@ -19,12 +20,40 @@ could be excuted with two lines of command. :: >>> print(dir) 'alchemtest/gmx/ABFE/complex' >>> workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, - >>> prefix='dhdl', suffix='xvg', T=298, out='./') + >>> prefix='dhdl', suffix='xvg', T=298, outdirectory='./') >>> workflow.run(skiptime=10, uncorr='dhdl', threshold=50, >>> methods=('mbar', 'bar', 'ti'), overlap='O_MBAR.pdf', >>> breakdown=True, forwrev=10) -This would give the free energy estimate using all of + +File Input +^^^^^^^^^^ + +This command expects the energy files to be structured in two ways. It could +either be :: + simulation + ├── lambda_0 + │   ├── prod.xvg + │   └── ... + ├── lambda_1 + │   ├── prod.xvg + │   └── ... + └── ... + +Where :code:`dir='simulation/lambda_*', prefix='prod', suffix='xvg'`. +Or :: + + dhdl_files + ├── dhdl_0.xvg + ├── dhdl_1.xvg + └── ... + +Where :code:`dir='dhdl_files', prefix='dhdl_', suffix='xvg'`. + +output +^^^^^^ + +The command would give the free energy estimate using all of :class:`~alchemlyb.estimators.TI`, :class:`~alchemlyb.estimators.BAR`, :class:`~alchemlyb.estimators.MBAR` and the result will be stored in :attr:`~alchemlyb.workflows.ABFE.summary` as :class:`pandas.Dataframe`. :: @@ -43,15 +72,20 @@ This would give the free energy estimate using all of bonded 2.374144 0.014995 2.341631 0.005507 2.363828 0.021078 TOTAL 35.137291 0.103580 34.860619 0.087022 34.924618 0.119206 +Output Files +^^^^^^^^^^^^ + +The output plots will be writen to the folder specified by `outdirectory`. + The :ref:`overlay matrix for the MBAR estimator ` will be -plotted and saved to `O_MBAR.pdf`. +plotted and saved to :file:`O_MBAR.pdf`. -The :ref:`dHdl for TI ` will be plotted to `dhdl_TI.pdf`. +The :ref:`dHdl for TI ` will be plotted to :file:`dhdl_TI.pdf`. -The :ref:`dF states ` will be plotted to `dF_state.pdf` in -portrait model and `dF_state_long.pdf` in landscape model. +The :ref:`dF states ` will be plotted to :file:`dF_state.pdf` in +portrait model and :file:`dF_state_long.pdf` in landscape model. -The forward and backward convergence will be plotted to `dF_t.pdf` using +The forward and backward convergence will be plotted to :file:`dF_t.pdf` using :class:`~alchemlyb.estimators.MBAR` and save in :attr:`~alchemlyb.workflows.ABFE.convergence`. @@ -69,7 +103,7 @@ to the data generated at each stage of the analysis. :: 'alchemtest/gmx/ABFE/complex' >>> # Load the data >>> workflow = ABFE(software='Gromacs', dir=dir, - >>> prefix='dhdl', suffix='xvg', T=298, out='./') + >>> prefix='dhdl', suffix='xvg', T=298, outdirectory='./') >>> # Set the unit. >>> workflow.update_units('kcal/mol') >>> # Read the data diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index fd67932a..aa5d3117 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -281,7 +281,7 @@ def test_unprocessed_dhdl(self, workflow, monkeypatch): assert len(workflow.convergence) == 10 class Test_automatic_amber(): - '''Test the full automatic workflow for load_ABFE from alchemtest.gmx for + '''Test the full automatic workflow for load_ABFE from alchemtest.amber for three stage transformation.''' @staticmethod diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index c9ce2b98..89ca75bb 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -20,7 +20,12 @@ class ABFE(WorkflowBase): - '''Alchemical Analysis style automatic workflow. + '''Workflow for absolute and relative binding free energy calculations. + + This workflow provides functionality similar to the ``alchemical-analysis.py`` script. + It loads multiple input files from alchemical free energy calculations and computes the + free energies between different alchemical windows using different estimators. It + produces plots to aid in the assessment of convergence. Parameters ---------- @@ -28,9 +33,12 @@ class ABFE(WorkflowBase): The unit used for printing and plotting results. {'kcal/mol', 'kJ/mol', 'kT'}. Default: 'kT'. software : str - The software used for generating input. {'GROMACS', 'AMBER'} + The software used for generating input (case-insensitive). {'GROMACS', 'AMBER'}. + This option chooses the appropriate parser for the input file. dir : str Directory in which data files are stored. Default: os.path.curdir. + The input files are searched using the pattern of + `dir + '/**/' + prefix + '*' + suffix. prefix : str Prefix for datafile sets. Default: 'dhdl'. suffix : str @@ -40,6 +48,8 @@ class ABFE(WorkflowBase): outdirectory : str Directory in which the output files produced by this script will be stored. Default: os.path.curdir. + ignore_warnings : bool + Turn all errors into warnings. Attributes ---------- @@ -49,9 +59,12 @@ class ABFE(WorkflowBase): The list of filenames sorted by the lambda state. ''' def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, - prefix='dhdl', suffix='xvg', T=298, outdirectory=os.path.curdir): + prefix='dhdl', suffix='xvg', T=298, + outdirectory=os.path.curdir, + ignore_warnings=False): super().__init__(units, software, T, outdirectory) + self.ignore_warnings = ignore_warnings self.logger = logging.getLogger('alchemlyb.workflows.ABFE') self.logger.info('Initialise Alchemlyb ABFE Workflow') self.logger.info(f'Alchemlyb Version: f{__version__}') @@ -78,7 +91,7 @@ def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, self._extract_u_nk = amber.extract_u_nk self._extract_dHdl = amber.extract_dHdl else: # pragma: no cover - raise NameError(f'{software} parser not found.') + raise NotImplementedError(f'{software} parser not found.') def read(self): '''Read the u_nk and dHdL data from the @@ -93,39 +106,37 @@ def read(self): ''' u_nk_list = [] dHdl_list = [] - for xvg in self.file_list: + for file in self.file_list: try: - u_nk = self._extract_u_nk(xvg, T=self.T) + u_nk = self._extract_u_nk(file, T=self.T) self.logger.info( - f'Reading {len(u_nk)} lines of u_nk from {xvg}') + f'Reading {len(u_nk)} lines of u_nk from {file}') u_nk_list.append(u_nk) except: # pragma: no cover - self.logger.warning( - f'Error reading read u_nk from {xvg}.') + msg = f'Error reading read u_nk from {file}.' + if self.ignore_warnings: + self.logger.warning(msg) + else: + raise ValueError(msg) try: - dhdl = self._extract_dHdl(xvg, T=self.T) + dhdl = self._extract_dHdl(file, T=self.T) self.logger.info( - f'Reading {len(dhdl)} lines of dhdl from {xvg}') + f'Reading {len(dhdl)} lines of dhdl from {file}') dHdl_list.append(dhdl) except: # pragma: no cover - self.logger.warning( - f'Error reading read dhdl from {xvg}.') + msg = f'Error reading read dhdl from {file}.' + if self.ignore_warnings: + self.logger.warning(msg) + else: + raise ValueError(msg) # Sort the files according to the state - if len(u_nk_list) > 0: - self.logger.info('Sort files according to the u_nk.') - column_names = u_nk_list[0].columns.values.tolist() - index_list = sorted(range(len(self.file_list)), - key=lambda x:column_names.index( - u_nk_list[x].reset_index('time').index.values[0])) - else: - self.logger.info('Sort files according to the dHdl.') - column_names = sorted([dHdl.reset_index('time').index.values[0] - for dHdl in dHdl_list]) - index_list = sorted(range(len(self.file_list)), - key=lambda x:column_names.index( - dHdl_list[x].reset_index('time').index.values[0])) + self.logger.info('Sort files according to the u_nk.') + column_names = u_nk_list[0].columns.values.tolist() + index_list = sorted(range(len(self.file_list)), + key=lambda x:column_names.index( + u_nk_list[x].reset_index('time').index.values[0])) self.file_list = [self.file_list[i] for i in index_list] self.logger.info("Sorted file list: \n %s", '\n'.join(self.file_list)) @@ -147,24 +158,24 @@ def run(self, skiptime=0, uncorr='dhdl', threshold=50, uncorr : str The observable to be used for the autocorrelation analysis; 'dhdl' (obtained as a sum over those energy components that are changing). - Specify as `None` will not uncorrelate the data. Default: `dhdl`. + Specify as `None` will not uncorrelate the data. Default: 'dhdl'. threshold : int Proceed with correlated samples if the number of uncorrelated samples is found to be less than this number. If 0 is given, the time series analysis will not be performed at all. Default: 50. - methods : str - A list of the methods to esitimate the free energy with. Default: + methods : str or list of str + A list of the methods to estimate the free energy with. Default: `('mbar', 'bar', 'ti')`. overlap : str - The filename for the plot of overlap matrix. Default: `O_MBAR.pdf`. + The filename for the plot of overlap matrix. Default: 'O_MBAR.pdf'. breakdown : bool Plot the free energy differences evaluated for each pair of adjacent states for all methods, including the dH/dlambda curve for TI. Default: - True. + ``True``. forwrev : int Plot the free energy change as a function of time in both directions, with the specified number of points in the time plot. The number of time - points (an integer) must be provided. Specify as `None` will not do + points (an integer) must be provided. Specify as ``None`` will not do the convergence analysis. Default: 10. Attributes @@ -202,19 +213,18 @@ def run(self, skiptime=0, uncorr='dhdl', threshold=50, plt.close(ax.figure) - def update_units(self, units): + def update_units(self, units=None): '''Update the unit. Parameters ---------- - units : str - The unit used for printing and plotting results. {'kcal/mol', - 'kJ/mol', 'kT'} + units : {'kcal/mol', 'kJ/mol', 'kT'} + The unit used for printing and plotting results. ''' if units is not None: self.logger.info(f'Set unit to {units}.') - self.units = units + self.units = units or None else: # pragma: no cover pass @@ -293,7 +303,7 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): Parameters ---------- - methods : str + methods : str or list of str A list of the methods to esitimate the free energy with. Default: ['TI', 'BAR', 'MBAR']. From 456288aa6060406ff5e0a2719145a542163f1d88 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 26 Jun 2022 16:16:12 +0100 Subject: [PATCH 086/123] Update abfe.py --- src/alchemlyb/workflows/abfe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 89ca75bb..327f11b4 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -38,7 +38,7 @@ class ABFE(WorkflowBase): dir : str Directory in which data files are stored. Default: os.path.curdir. The input files are searched using the pattern of - `dir + '/**/' + prefix + '*' + suffix. + ``dir + '/**/' + prefix + '*' + suffix``. prefix : str Prefix for datafile sets. Default: 'dhdl'. suffix : str From d859c4e69e9c6472ce6b4f4afe7ed693f3353583 Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Sun, 3 Jul 2022 10:23:50 +0100 Subject: [PATCH 087/123] update --- docs/workflows/alchemlyb.workflows.ABFE.rst | 40 +++++++++++++++------ src/alchemlyb/workflows/abfe.py | 8 ++--- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/docs/workflows/alchemlyb.workflows.ABFE.rst b/docs/workflows/alchemlyb.workflows.ABFE.rst index e11d5095..337a9471 100644 --- a/docs/workflows/alchemlyb.workflows.ABFE.rst +++ b/docs/workflows/alchemlyb.workflows.ABFE.rst @@ -15,6 +15,9 @@ command. :: >>> import os >>> from alchemtest.gmx import load_ABFE >>> from alchemlyb.workflows import ABFE + >>> # Enable the logger + >>> import logging + >>> logging.basicConfig(filename='ABFE.log', level=logging.INFO) >>> # Obtain the path of the data >>> dir = os.path.dirname(load_ABFE()['data']['complex'][0]) >>> print(dir) @@ -26,11 +29,15 @@ command. :: >>> breakdown=True, forwrev=10) +See :mod:`~alchemlyb.workflows.ABFE` for the explanation with regard to the +parameters. The next two sections explains the output of the workflow and a +set of analysis that allows the user to examine the quality of the estimate. + File Input ^^^^^^^^^^ -This command expects the energy files to be structured in two ways. It could -either be :: +This command expects the energy files to be structured in two common ways. It +could either be :: simulation ├── lambda_0 │   ├── prod.xvg @@ -40,8 +47,7 @@ either be :: │   └── ... └── ... -Where :code:`dir='simulation/lambda_*', prefix='prod', suffix='xvg'`. -Or :: +Where :code:`dir='simulation/lambda_*', prefix='prod', suffix='xvg'`. Or :: dhdl_files ├── dhdl_0.xvg @@ -55,8 +61,14 @@ output The command would give the free energy estimate using all of :class:`~alchemlyb.estimators.TI`, :class:`~alchemlyb.estimators.BAR`, -:class:`~alchemlyb.estimators.MBAR` and the result will be stored in -:attr:`~alchemlyb.workflows.ABFE.summary` as :class:`pandas.Dataframe`. :: +:class:`~alchemlyb.estimators.MBAR`. For ABFE calculations, the alchemical +transformation is usually done is three stages, the *bonded*, *coul* and *vdw* +which corresponds to the free energy contribution from applying the +restraint to restraint the ligand to the protein, decouple/annihilate the +coulombic interaction between the ligand and the protein and +decouple/annihilate the protein-ligand lennard jones interactions. The result +will be stored in :attr:`~alchemlyb.workflows.ABFE.summary` as +:class:`pandas.Dataframe`. :: MBAR MBAR_Error BAR BAR_Error TI TI_Error @@ -75,19 +87,25 @@ The command would give the free energy estimate using all of Output Files ^^^^^^^^^^^^ -The output plots will be writen to the folder specified by `outdirectory`. +For quality assessment, a couple of plots were generated and writen to +the folder specified by `outdirectory`. The :ref:`overlay matrix for the MBAR estimator ` will be -plotted and saved to :file:`O_MBAR.pdf`. +plotted and saved to :file:`O_MBAR.pdf`, which examines the overlap between +different lambda windows. -The :ref:`dHdl for TI ` will be plotted to :file:`dhdl_TI.pdf`. +The :ref:`dHdl for TI ` will be plotted to +:file:`dhdl_TI.pdf`, allows one to examine if the lambda scheduling has +covered the change of the gradient in the lambda space. The :ref:`dF states ` will be plotted to :file:`dF_state.pdf` in -portrait model and :file:`dF_state_long.pdf` in landscape model. +portrait model and :file:`dF_state_long.pdf` in landscape model, which +allows the user to example the contributions from each lambda window. The forward and backward convergence will be plotted to :file:`dF_t.pdf` using :class:`~alchemlyb.estimators.MBAR` and save in -:attr:`~alchemlyb.workflows.ABFE.convergence`. +:attr:`~alchemlyb.workflows.ABFE.convergence`, which allows the user to +examine if the simulation time is enough to achieve a converged result. Semi-automatic analysis ----------------------- diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 327f11b4..5079aa02 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -153,8 +153,8 @@ def run(self, skiptime=0, uncorr='dhdl', threshold=50, Parameters ---------- skiptime : float - Discard data prior to this specified time as 'equilibration' data. Units - picoseconds. Default: 0. + Discard data prior to this specified time as 'equilibration' data. + Units are specified by the corresponding MD Engine. Default: 0. uncorr : str The observable to be used for the autocorrelation analysis; 'dhdl' (obtained as a sum over those energy components that are changing). @@ -236,11 +236,11 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): ---------- skiptime : float Discard data prior to this specified time as 'equilibration' data. - Units picoseconds. Default: 0. + Units are specified by the corresponding MD Engine. Default: 0. uncorr : str The observable to be used for the autocorrelation analysis; 'dhdl' (obtained as a sum over those energy components that are changing). - Default: `dhdl` + Default: 'dhdl' threshold : int Proceed with correlated samples if the number of uncorrelated samples is found to be less than this number. If 0 is given, the From 03aad4704070be3febc083578a7ccb8ad1a3e155 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 3 Jul 2022 10:24:47 +0100 Subject: [PATCH 088/123] Update docs/workflows/alchemlyb.workflows.ABFE.rst Co-authored-by: Oliver Beckstein --- docs/workflows/alchemlyb.workflows.ABFE.rst | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/docs/workflows/alchemlyb.workflows.ABFE.rst b/docs/workflows/alchemlyb.workflows.ABFE.rst index 337a9471..93d58b7f 100644 --- a/docs/workflows/alchemlyb.workflows.ABFE.rst +++ b/docs/workflows/alchemlyb.workflows.ABFE.rst @@ -7,10 +7,19 @@ step-by-step version that allows more flexibility. Fully Automatic analysis ------------------------ -A interface similar to -`Alchemical Analysis `_ -could be excuted using :mod:`~alchemlyb.workflows.ABFE` with two lines of -command. :: +*Absolute binding free energy* (ABFE) calculations can be analyzed with +two lines of code in a fully automated manner (similar to +`Alchemical Analysis `_). +In this case, any parameters are set when invoking :class:`~alchemlyb.workflows.abfe.ABFE` +and reasonable defaults are chosen for any parameters not set explicitly. The two steps +are to + +1. initialize an instance of the :class:`~alchemlyb.workflows.abfe.ABFE` class +2. invoke the :meth:`~alchemlyb.workflows.abfe.ABFE.run` method to execute + complete workflow. + +For a GROMACS ABFE simulation, executing the workflow would look similar +to the following code:: >>> import os >>> from alchemtest.gmx import load_ABFE From 995bba1f7875663f3261237a541013497c0f5e37 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 3 Jul 2022 10:25:16 +0100 Subject: [PATCH 089/123] Update docs/workflows/alchemlyb.workflows.ABFE.rst Co-authored-by: Oliver Beckstein --- docs/workflows/alchemlyb.workflows.ABFE.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/workflows/alchemlyb.workflows.ABFE.rst b/docs/workflows/alchemlyb.workflows.ABFE.rst index 93d58b7f..88054a7f 100644 --- a/docs/workflows/alchemlyb.workflows.ABFE.rst +++ b/docs/workflows/alchemlyb.workflows.ABFE.rst @@ -68,7 +68,7 @@ Where :code:`dir='dhdl_files', prefix='dhdl_', suffix='xvg'`. output ^^^^^^ -The command would give the free energy estimate using all of +The workflow returns the free energy estimate using all of :class:`~alchemlyb.estimators.TI`, :class:`~alchemlyb.estimators.BAR`, :class:`~alchemlyb.estimators.MBAR`. For ABFE calculations, the alchemical transformation is usually done is three stages, the *bonded*, *coul* and *vdw* From 5e3d6c4a53fd26a849f1457658f67abd3aa4aa98 Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Sun, 3 Jul 2022 11:06:51 +0100 Subject: [PATCH 090/123] update --- docs/workflows/alchemlyb.workflows.ABFE.rst | 10 ++++---- src/alchemlyb/estimators/__init__.py | 3 +++ src/alchemlyb/tests/test_workflow_ABFE.py | 4 +++ src/alchemlyb/workflows/abfe.py | 27 ++++++++++++++------- 4 files changed, 30 insertions(+), 14 deletions(-) diff --git a/docs/workflows/alchemlyb.workflows.ABFE.rst b/docs/workflows/alchemlyb.workflows.ABFE.rst index 88054a7f..b3b9b806 100644 --- a/docs/workflows/alchemlyb.workflows.ABFE.rst +++ b/docs/workflows/alchemlyb.workflows.ABFE.rst @@ -15,19 +15,19 @@ and reasonable defaults are chosen for any parameters not set explicitly. The tw are to 1. initialize an instance of the :class:`~alchemlyb.workflows.abfe.ABFE` class -2. invoke the :meth:`~alchemlyb.workflows.abfe.ABFE.run` method to execute +2. invoke the :meth:`~alchemlyb.workflows.ABFE.run` method to execute complete workflow. For a GROMACS ABFE simulation, executing the workflow would look similar to the following code:: - >>> import os >>> from alchemtest.gmx import load_ABFE >>> from alchemlyb.workflows import ABFE >>> # Enable the logger >>> import logging >>> logging.basicConfig(filename='ABFE.log', level=logging.INFO) >>> # Obtain the path of the data + >>> import os >>> dir = os.path.dirname(load_ABFE()['data']['complex'][0]) >>> print(dir) 'alchemtest/gmx/ABFE/complex' @@ -65,7 +65,7 @@ Where :code:`dir='simulation/lambda_*', prefix='prod', suffix='xvg'`. Or :: Where :code:`dir='dhdl_files', prefix='dhdl_', suffix='xvg'`. -output +Output ^^^^^^ The workflow returns the free energy estimate using all of @@ -96,7 +96,7 @@ will be stored in :attr:`~alchemlyb.workflows.ABFE.summary` as Output Files ^^^^^^^^^^^^ -For quality assessment, a couple of plots were generated and writen to +For quality assessment, a couple of plots were generated and written to the folder specified by `outdirectory`. The :ref:`overlay matrix for the MBAR estimator ` will be @@ -121,10 +121,10 @@ Semi-automatic analysis The same analysis could also performed in steps allowing access and modification to the data generated at each stage of the analysis. :: - >>> import os >>> from alchemtest.gmx import load_ABFE >>> from alchemlyb.workflows import ABFE >>> # Obtain the path of the data + >>> import os >>> dir = os.path.dirname(load_ABFE()['data']['complex'][0]) >>> print(dir) 'alchemtest/gmx/ABFE/complex' diff --git a/src/alchemlyb/estimators/__init__.py b/src/alchemlyb/estimators/__init__.py index c05fb93c..00fc6a39 100644 --- a/src/alchemlyb/estimators/__init__.py +++ b/src/alchemlyb/estimators/__init__.py @@ -1,3 +1,6 @@ from .mbar_ import MBAR, AutoMBAR from .bar_ import BAR from .ti_ import TI + +FEP_ESTIMATORS = [MBAR.__name__, AutoMBAR.__name__, BAR.__name__] +TI_ESTIMATORS = [TI.__name__] \ No newline at end of file diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index aa5d3117..60a4d054 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -263,6 +263,10 @@ def test_bar_convergence(self, workflow): workflow.check_convergence(10, estimator='bar') assert len(workflow.convergence) == 10 + def test_convergence_invalid_estimator(self, workflow): + with pytest.raises(ValueError): + workflow.check_convergence(10, estimator='aaa') + def test_ti_convergence(self, workflow): workflow.check_convergence(10, estimator='ti') assert len(workflow.convergence) == 10 diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 5079aa02..a2033b87 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -9,7 +9,7 @@ from .base import WorkflowBase from ..parsing import gmx, amber from ..preprocessing.subsampling import decorrelate_dhdl, decorrelate_u_nk -from ..estimators import BAR, TI +from ..estimators import BAR, TI, FEP_ESTIMATORS, TI_ESTIMATORS from ..estimators import AutoMBAR as MBAR from ..visualisation import (plot_mbar_overlap_matrix, plot_ti_dhdl, plot_dF_state, plot_convergence) @@ -183,7 +183,9 @@ def run(self, skiptime=0, uncorr='dhdl', threshold=50, summary : Dataframe The summary of the free energy estimate. convergence : DataFrame - The summary of the convergence results. + The summary of the convergence results. See + :func:`~alchemlyb.convergence.forward_backward_convergence` for + further explanation. ''' self.read() @@ -520,8 +522,8 @@ def plot_overlap_matrix(self, overlap='O_MBAR.pdf', ax=None): overlap : str The filename for the plot of overlap matrix. Default: 'O_MBAR.pdf' ax : matplotlib.axes.Axes - Matplotlib axes object where the plot will be drawn on. If ax=None, - a new axes will be generated. + Matplotlib axes object where the plot will be drawn on. If + ``ax=None``, a new axes will be generated. Returns ------- @@ -607,7 +609,7 @@ def check_convergence(self, forwrev, estimator='autombar', dF_t='dF_t.pdf', ax=None): '''Compute the forward and backward convergence using :func:`~alchemlyb.convergence.forward_backward_convergence`and - plotted with + plot with :func:`~alchemlyb.visualisation.plot_convergence`. Parameters @@ -616,7 +618,7 @@ def check_convergence(self, forwrev, estimator='autombar', dF_t='dF_t.pdf', Plot the free energy change as a function of time in both directions, with the specified number of points in the time plot. The number of time points (an integer) must be provided. - estimator : str + estimator : {'TI', 'BAR', 'MBAR', 'AutoMBAR'} The estimator used for convergence analysis. Default: 'autombar' dF_t : str The filename for the plot of convergence. Default: 'dF_t.pdf' @@ -634,9 +636,9 @@ def check_convergence(self, forwrev, estimator='autombar', dF_t='dF_t.pdf', An axes with the convergence drawn. ''' self.logger.info('Start convergence analysis.') - self.logger.info('Check data availability.') + self.logger.info('Checking data availability.') - if estimator.lower() in ['mbar', 'bar', 'autombar']: + if estimator.lower() in [x.lower() for x in FEP_ESTIMATORS]: if self.u_nk_sample_list is not None: u_nk_list = self.u_nk_sample_list self.logger.info('Subsampled u_nk is available.') @@ -650,7 +652,9 @@ def check_convergence(self, forwrev, estimator='autombar', dF_t='dF_t.pdf', convergence = forward_backward_convergence(u_nk_list, estimator=estimator, num=forwrev) - else: + elif estimator.lower() in [x.lower() for x in TI_ESTIMATORS]: + self.logger.warning('No valid FEP estimator or dataset found. ' + 'Fallback to TI.') if self.dHdl_sample_list is not None: dHdl_list = self.dHdl_sample_list self.logger.info('Subsampled dHdl is available.') @@ -664,6 +668,11 @@ def check_convergence(self, forwrev, estimator='autombar', dF_t='dF_t.pdf', convergence = forward_backward_convergence(dHdl_list, estimator=estimator, num=forwrev) + else: + msg = f"Estimator {estimator} is not supported. Choose one from " \ + f"{FEP_ESTIMATORS+TI_ESTIMATORS}." + self.logger.error(msg) + raise ValueError(msg) self.convergence = get_unit_converter(self.units)(convergence) From 2ccef0c44f525940a6b55c290afdd29535a26604 Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Sun, 14 Aug 2022 09:24:36 +0100 Subject: [PATCH 091/123] resolve comments --- docs/workflows/alchemlyb.workflows.ABFE.rst | 2 +- src/alchemlyb/convergence/convergence.py | 18 +++--- src/alchemlyb/workflows/abfe.py | 61 ++++++++++++--------- 3 files changed, 44 insertions(+), 37 deletions(-) diff --git a/docs/workflows/alchemlyb.workflows.ABFE.rst b/docs/workflows/alchemlyb.workflows.ABFE.rst index b3b9b806..4ab1a86a 100644 --- a/docs/workflows/alchemlyb.workflows.ABFE.rst +++ b/docs/workflows/alchemlyb.workflows.ABFE.rst @@ -73,7 +73,7 @@ The workflow returns the free energy estimate using all of :class:`~alchemlyb.estimators.MBAR`. For ABFE calculations, the alchemical transformation is usually done is three stages, the *bonded*, *coul* and *vdw* which corresponds to the free energy contribution from applying the -restraint to restraint the ligand to the protein, decouple/annihilate the +restraint to restrain the ligand to the protein, decouple/annihilate the coulombic interaction between the ligand and the protein and decouple/annihilate the protein-ligand lennard jones interactions. The result will be stored in :attr:`~alchemlyb.workflows.ABFE.summary` as diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index 17ce716e..755819fc 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -2,11 +2,12 @@ import logging import numpy as np -from ..estimators import MBAR, BAR, TI, AutoMBAR +from ..estimators import BAR, TI +from ..estimators import AutoMBAR as MBAR from .. import concat -def forward_backward_convergence(df_list, estimator='mbar', num=10): +def forward_backward_convergence(df_list, estimator='MBAR', num=10): '''Forward and backward convergence of the free energy estimate. Generate the free energy estimate as a function of time in both directions, @@ -20,7 +21,7 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): ---------- df_list : list List of DataFrame of either dHdl or u_nk. - estimator : {'mbar', 'bar', 'ti', 'autombar'} + estimator : {'MBAR', 'BAR', 'TI'} Name of the estimators. num : int The number of time points. @@ -51,16 +52,13 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): logger.info('Start convergence analysis.') logger.info('Check data availability.') - if estimator.lower() == 'mbar': - logger.info('Use MBAR estimator for convergence analysis.') - estimator_fit = MBAR().fit - elif estimator.lower() == 'autombar': + if estimator == 'MBAR': logger.info('Use AutoMBAR estimator for convergence analysis.') - estimator_fit = AutoMBAR().fit - elif estimator.lower() == 'bar': + estimator_fit = MBAR().fit + elif estimator == 'BAR': logger.info('Use BAR estimator for convergence analysis.') estimator_fit = BAR().fit - elif estimator.lower() == 'ti': + elif estimator == 'TI': logger.info('Use TI estimator for convergence analysis.') estimator_fit = TI().fit else: diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index a2033b87..8fc6dcd4 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -115,8 +115,10 @@ def read(self): except: # pragma: no cover msg = f'Error reading read u_nk from {file}.' if self.ignore_warnings: - self.logger.warning(msg) + self.logger.exception(msg + + 'This exception is being ignored because ignore_warnings=True.') else: + self.logger.error(msg) raise ValueError(msg) try: @@ -127,8 +129,10 @@ def read(self): except: # pragma: no cover msg = f'Error reading read dhdl from {file}.' if self.ignore_warnings: - self.logger.warning(msg) + self.logger.exception(msg + + 'This exception is being ignored because ignore_warnings=True.') else: + self.logger.error(msg) raise ValueError(msg) # Sort the files according to the state @@ -300,30 +304,36 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): else: # pragma: no cover self.logger.info('No dHdl data being subsampled') - def estimate(self, methods=('mbar', 'bar', 'ti')): + def estimate(self, methods=('MBAR', 'BAR', 'TI')): '''Estimate the free energy using the selected estimator. Parameters ---------- methods : str or list of str - A list of the methods to esitimate the free energy with. Default: + A list of the methods to estimate the free energy with. Default: ['TI', 'BAR', 'MBAR']. Attributes ---------- estimator : dict - The dictionary of estimators. The key for MBAR is 'mbar', for BAR is - 'bar' and for TI is 'ti'. + The dictionary of estimators. The keys are in ['TI', 'BAR', + 'MBAR']. ''' # Make estimators into a tuple if isinstance(methods, str): methods = (methods, ) + for estimator in methods: + if estimator not in (FEP_ESTIMATORS + TI_ESTIMATORS): + msg = f"Estimator {estimator} is not available in {FEP_ESTIMATORS + TI_ESTIMATORS}." + self.logger.error(msg) + raise ValueError(msg) + self.logger.info( f"Start running estimator: {','.join(methods)}.") self.estimator = {} # Use unprocessed data if preprocess is not performed. - if 'ti' in methods: + if 'TI' in methods: if self.dHdl_sample_list is not None: dHdl = concat(self.dHdl_sample_list) else: @@ -332,7 +342,7 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): self.logger.info( f'A total {len(dHdl)} lines of dHdl is used.') - if 'bar' in methods or 'mbar' in methods: + if 'BAR' in methods or 'MBAR' in methods: if self.u_nk_sample_list is not None: u_nk = concat(self.u_nk_sample_list) else: @@ -342,20 +352,19 @@ def estimate(self, methods=('mbar', 'bar', 'ti')): f'A total {len(u_nk)} lines of u_nk is used.') for estimator in methods: - if estimator.lower() == 'mbar' and len(u_nk) > 0: + if estimator == 'MBAR' and len(u_nk) > 0: self.logger.info('Run MBAR estimator.') - self.estimator['mbar'] = MBAR().fit(u_nk) - elif estimator.lower() == 'bar' and len(u_nk) > 0: + self.estimator[estimator] = MBAR().fit(u_nk) + elif estimator == 'BAR' and len(u_nk) > 0: self.logger.info('Run BAR estimator.') - self.estimator['bar'] = BAR().fit(u_nk) - elif estimator.lower() == 'ti' and len(dHdl) > 0: + self.estimator[estimator] = BAR().fit(u_nk) + elif estimator == 'TI' and len(dHdl) > 0: self.logger.info('Run TI estimator.') - self.estimator['ti'] = TI().fit(dHdl) - elif estimator.lower() == 'mbar' or estimator.lower() == 'bar': # pragma: no cover - self.logger.warning('MBAR or BAR estimator require u_nk') - else: # pragma: no cover - self.logger.warning( - f'{estimator} is not a valid estimator.') + self.estimator[estimator] = TI().fit(dHdl) + elif estimator in FEP_ESTIMATORS: # pragma: no cover + self.logger.warning(f'{estimator} estimator require u_nk') + else: + self.logger.warning(f'{estimator} estimator require dHdl') def generate_result(self): '''Summarise the result into a dataframe. @@ -410,8 +419,8 @@ def generate_result(self): self.logger.info('Summarise the estimate into a dataframe.') # Make the header name self.logger.info('Generate the row names.') - eitimator_names = list(self.estimator.keys()) - num_states = len(self.estimator[eitimator_names[0]].states_) + estimator_names = list(self.estimator.keys()) + num_states = len(self.estimator[estimator_names[0]].states_) data_dict = {'name': [], 'state': []} for i in range(num_states - 1): @@ -556,7 +565,7 @@ def plot_ti_dhdl(self, dhdl_TI='dhdl_TI.pdf', labels=None, colors=None, list of colors for plotting all the alchemical transformations. Default: ['r', 'g', '#7F38EC', '#9F000F', 'b', 'y'] ax : matplotlib.axes.Axes - Matplotlib axes object where the plot will be drawn on. If ax=None, + Matplotlib axes object where the plot will be drawn on. If ``ax=None``, a new axes will be generated. Returns @@ -618,8 +627,8 @@ def check_convergence(self, forwrev, estimator='autombar', dF_t='dF_t.pdf', Plot the free energy change as a function of time in both directions, with the specified number of points in the time plot. The number of time points (an integer) must be provided. - estimator : {'TI', 'BAR', 'MBAR', 'AutoMBAR'} - The estimator used for convergence analysis. Default: 'autombar' + estimator : {'TI', 'BAR', 'MBAR'} + The estimator used for convergence analysis. Default: 'MBAR' dF_t : str The filename for the plot of convergence. Default: 'dF_t.pdf' ax : matplotlib.axes.Axes @@ -638,7 +647,7 @@ def check_convergence(self, forwrev, estimator='autombar', dF_t='dF_t.pdf', self.logger.info('Start convergence analysis.') self.logger.info('Checking data availability.') - if estimator.lower() in [x.lower() for x in FEP_ESTIMATORS]: + if estimator in FEP_ESTIMATORS: if self.u_nk_sample_list is not None: u_nk_list = self.u_nk_sample_list self.logger.info('Subsampled u_nk is available.') @@ -652,7 +661,7 @@ def check_convergence(self, forwrev, estimator='autombar', dF_t='dF_t.pdf', convergence = forward_backward_convergence(u_nk_list, estimator=estimator, num=forwrev) - elif estimator.lower() in [x.lower() for x in TI_ESTIMATORS]: + elif estimator in TI_ESTIMATORS: self.logger.warning('No valid FEP estimator or dataset found. ' 'Fallback to TI.') if self.dHdl_sample_list is not None: From 7aa6ecb2745f567b05c272f6a90a9edae1662057 Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Sun, 14 Aug 2022 09:28:43 +0100 Subject: [PATCH 092/123] fix name --- src/alchemlyb/workflows/abfe.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 8fc6dcd4..7a04a209 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -150,7 +150,7 @@ def read(self): self.dHdl_sample_list = None def run(self, skiptime=0, uncorr='dhdl', threshold=50, - methods=('mbar', 'bar', 'ti'), overlap='O_MBAR.pdf', + methods=('MABR', 'BAR', 'TI'), overlap='O_MBAR.pdf', breakdown=True, forwrev=10, *args, **kwargs): ''' The method for running the automatic analysis. @@ -169,7 +169,7 @@ def run(self, skiptime=0, uncorr='dhdl', threshold=50, analysis will not be performed at all. Default: 50. methods : str or list of str A list of the methods to estimate the free energy with. Default: - `('mbar', 'bar', 'ti')`. + `('MABR', 'BAR', 'TI')`. overlap : str The filename for the plot of overlap matrix. Default: 'O_MBAR.pdf'. breakdown : bool @@ -214,7 +214,7 @@ def run(self, skiptime=0, uncorr='dhdl', threshold=50, plt.close(fig) if forwrev is not None: - ax = self.check_convergence(forwrev, estimator='autombar', + ax = self.check_convergence(forwrev, estimator='MBAR', dF_t='dF_t.pdf') plt.close(ax.figure) @@ -488,7 +488,7 @@ def generate_result(self): self.logger.info( f'Stage {stage} is from state {start} to state {end}.') result = delta_f_.iloc[start, end] - if estimator_name != 'bar': + if estimator_name != 'BAR': error = d_delta_f_.iloc[start, end] else: error = np.sqrt(sum( @@ -499,7 +499,7 @@ def generate_result(self): # Total result result = delta_f_.iloc[0, -1] - if estimator_name != 'bar': + if estimator_name != 'BAR': error = d_delta_f_.iloc[0, -1] else: error = np.sqrt(sum( @@ -540,8 +540,8 @@ def plot_overlap_matrix(self, overlap='O_MBAR.pdf', ax=None): An axes with the overlap matrix drawn. ''' self.logger.info('Plot overlap matrix.') - if 'mbar' in self.estimator: - ax = plot_mbar_overlap_matrix(self.estimator['mbar'].overlap_matrix, + if 'MBAR' in self.estimator: + ax = plot_mbar_overlap_matrix(self.estimator['MBAR'].overlap_matrix, ax=ax) ax.figure.savefig(join(self.out, overlap)) self.logger.info(f'Plot overlap matrix to {self.out} under {overlap}.') @@ -574,8 +574,8 @@ def plot_ti_dhdl(self, dhdl_TI='dhdl_TI.pdf', labels=None, colors=None, An axes with the TI dhdl drawn. ''' self.logger.info('Plot TI dHdl.') - if 'ti' in self.estimator: - ax = plot_ti_dhdl(self.estimator['ti'], units=self.units, + if 'TI' in self.estimator: + ax = plot_ti_dhdl(self.estimator['TI'], units=self.units, labels=labels, colors=colors, ax=ax) ax.figure.savefig(join(self.out, dhdl_TI)) self.logger.info(f'Plot TI dHdl to {dhdl_TI} under {self.out}.') @@ -614,7 +614,7 @@ def plot_dF_state(self, dF_state='dF_state.pdf', labels=None, colors=None, self.logger.info(f'Plot dF state to {dF_state} under {self.out}.') return fig - def check_convergence(self, forwrev, estimator='autombar', dF_t='dF_t.pdf', + def check_convergence(self, forwrev, estimator='MBAR', dF_t='dF_t.pdf', ax=None): '''Compute the forward and backward convergence using :func:`~alchemlyb.convergence.forward_backward_convergence`and From eca06dbd659d0db8677af82753a105a67224cefc Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Sun, 14 Aug 2022 09:40:44 +0100 Subject: [PATCH 093/123] fix test --- .github/workflows/ci.yaml | 2 +- src/alchemlyb/tests/test_workflow_ABFE.py | 44 +++++++++++------------ 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 86b7111a..93dada83 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -26,7 +26,7 @@ jobs: test: runs-on: ${{ matrix.os }} strategy: - fail-fast: false + fail-fast: true matrix: os: ["ubuntu-latest", "macOS-latest", "windows-latest"] python-version: ["3.7", "3.8", "3.9", "3.10"] diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index 60a4d054..f9a18fc1 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -18,7 +18,7 @@ def workflow(tmp_path_factory): workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, prefix='dhdl', suffix='xvg', T=310, outdirectory=str(outdir)) workflow.run(skiptime=10, uncorr='dhdl', threshold=50, - methods=('mbar', 'bar', 'ti'), overlap='O_MBAR.pdf', + methods=('MBAR', 'BAR', 'TI'), overlap='O_MBAR.pdf', breakdown=True, forwrev=10) return workflow @@ -39,9 +39,9 @@ def test_subsample(self, workflow): def test_estimator(self, workflow): '''Test if all three estimators have been used.''' assert len(workflow.estimator) == 3 - assert 'mbar' in workflow.estimator - assert 'ti' in workflow.estimator - assert 'bar' in workflow.estimator + assert 'MBAR' in workflow.estimator + assert 'TI' in workflow.estimator + assert 'BAR' in workflow.estimator def test_summary(self, workflow): '''Test if if the summary is right.''' @@ -82,7 +82,7 @@ def workflow(tmp_path_factory): workflow.update_units('kcal/mol') workflow.read() workflow.preprocess(skiptime=10, uncorr='dhdl', threshold=50) - workflow.estimate(methods=('mbar', 'bar', 'ti')) + workflow.estimate(methods=('MBAR', 'BAR', 'TI')) workflow.plot_overlap_matrix(overlap='O_MBAR.pdf') workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') workflow.plot_dF_state(dF_state='dF_state.pdf') @@ -104,7 +104,7 @@ def test_convergence_nosample_u_nk(self, workflow, monkeypatch): def test_dhdl_TI_noTI(self, workflow, monkeypatch): '''Test to plot the dhdl_TI when ti estimator is not there''' no_TI = workflow.estimator - no_TI.pop('ti') + no_TI.pop('TI') monkeypatch.setattr(workflow, 'estimator', no_TI) with pytest.raises(ValueError): @@ -125,7 +125,7 @@ def workflow(tmp_path_factory): prefix='dhdl', suffix='bz2', T=310, outdirectory=outdir) workflow.run(skiptime=0, uncorr='dhdl', threshold=50, - methods=('mbar', 'bar', 'ti'), overlap='O_MBAR.pdf', + methods=('MBAR', 'BAR', 'TI'), overlap='O_MBAR.pdf', breakdown=True, forwrev=10) return workflow @@ -139,9 +139,9 @@ def test_read(self, workflow): def test_estimator(self, workflow): '''Test if all three estimators have been used.''' assert len(workflow.estimator) == 3 - assert 'mbar' in workflow.estimator - assert 'ti' in workflow.estimator - assert 'bar' in workflow.estimator + assert 'MBAR' in workflow.estimator + assert 'TI' in workflow.estimator + assert 'BAR' in workflow.estimator def test_O_MBAR(self, workflow): '''test if the O_MBAR.pdf has been plotted.''' @@ -192,10 +192,10 @@ def workflow(tmp_path_factory): dHdl.insert(1, 'bound', [1.0, ] * len(dHdl)) dHdl.set_index('bound-lambda', append=True, inplace=True) - workflow.estimate(methods=('ti', )) + workflow.estimate(methods=('TI', )) workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') workflow.plot_dF_state(dF_state='dF_state.pdf') - workflow.check_convergence(10, dF_t='dF_t.pdf', estimator='ti') + workflow.check_convergence(10, dF_t='dF_t.pdf', estimator='TI') return workflow def test_dhdl_TI(self, workflow): @@ -212,7 +212,7 @@ def test_convergence(self, workflow): assert len(workflow.convergence) == 10 def test_single_estimator_ti(self, workflow): - workflow.estimate(methods='ti') + workflow.estimate(methods='TI') summary = workflow.generate_result() assert np.isclose(summary['TI']['Stages']['TOTAL'], 2.946, 0.1) @@ -248,19 +248,19 @@ def test_uncorr_threshold(self, workflow, monkeypatch): assert all([len(dHdl) == 40 for dHdl in workflow.dHdl_sample_list]) def test_single_estimator_mbar(self, workflow): - workflow.estimate(methods='mbar') + workflow.estimate(methods='MBAR') assert len(workflow.estimator) == 1 - assert 'mbar' in workflow.estimator + assert 'MBAR' in workflow.estimator summary = workflow.generate_result() assert np.isclose(summary['MBAR']['Stages']['TOTAL'], 2.946, 0.1) def test_single_estimator_ti(self, workflow): - workflow.estimate(methods='ti') + workflow.estimate(methods='TI') summary = workflow.generate_result() assert np.isclose(summary['TI']['Stages']['TOTAL'], 2.946, 0.1) def test_bar_convergence(self, workflow): - workflow.check_convergence(10, estimator='bar') + workflow.check_convergence(10, estimator='BAR') assert len(workflow.convergence) == 10 def test_convergence_invalid_estimator(self, workflow): @@ -268,7 +268,7 @@ def test_convergence_invalid_estimator(self, workflow): workflow.check_convergence(10, estimator='aaa') def test_ti_convergence(self, workflow): - workflow.check_convergence(10, estimator='ti') + workflow.check_convergence(10, estimator='TI') assert len(workflow.convergence) == 10 def test_unprocessed_n_uk(self, workflow, monkeypatch): @@ -276,12 +276,12 @@ def test_unprocessed_n_uk(self, workflow, monkeypatch): None) workflow.estimate() assert len(workflow.estimator) == 3 - assert 'mbar' in workflow.estimator + assert 'MBAR' in workflow.estimator def test_unprocessed_dhdl(self, workflow, monkeypatch): monkeypatch.setattr(workflow, 'dHdl_sample_list', None) - workflow.check_convergence(10, estimator='ti') + workflow.check_convergence(10, estimator='TI') assert len(workflow.convergence) == 10 class Test_automatic_amber(): @@ -296,9 +296,9 @@ def workflow(tmp_path_factory): os.path.dirname(load_bace_example()['data']['complex']['vdw'][0])) workflow = ABFE(units='kcal/mol', software='Amber', dir=dir, - prefix='ti', suffix='bz2', T=310, outdirectory=str(outdir)) + prefix='TI', suffix='bz2', T=310, outdirectory=str(outdir)) workflow.read() - workflow.estimate(methods='ti') + workflow.estimate(methods='TI') return workflow def test_summary(self, workflow): From 720a76b25aa6a321aa2bf2c5061c0f56d2494a20 Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Sun, 14 Aug 2022 10:02:31 +0100 Subject: [PATCH 094/123] fix test --- src/alchemlyb/estimators/__init__.py | 2 +- src/alchemlyb/tests/test_convergence.py | 2 +- src/alchemlyb/workflows/abfe.py | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/alchemlyb/estimators/__init__.py b/src/alchemlyb/estimators/__init__.py index 00fc6a39..ca48015b 100644 --- a/src/alchemlyb/estimators/__init__.py +++ b/src/alchemlyb/estimators/__init__.py @@ -3,4 +3,4 @@ from .ti_ import TI FEP_ESTIMATORS = [MBAR.__name__, AutoMBAR.__name__, BAR.__name__] -TI_ESTIMATORS = [TI.__name__] \ No newline at end of file +TI_ESTIMATORS = [TI.__name__] diff --git a/src/alchemlyb/tests/test_convergence.py b/src/alchemlyb/tests/test_convergence.py index 7013d119..07a38193 100644 --- a/src/alchemlyb/tests/test_convergence.py +++ b/src/alchemlyb/tests/test_convergence.py @@ -30,7 +30,7 @@ def test_convergence_mbar(gmx_benzene): def test_convergence_autombar(gmx_benzene): dHdl, u_nk = gmx_benzene - convergence = forward_backward_convergence(u_nk, 'AutoMBAR') + convergence = forward_backward_convergence(u_nk, 'MBAR') assert convergence.shape == (10, 5) assert convergence.iloc[0, 0] == pytest.approx(3.02, 0.01) assert convergence.iloc[0, 2] == pytest.approx(3.06, 0.01) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 7a04a209..9d4ae79d 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -494,8 +494,8 @@ def generate_result(self): error = np.sqrt(sum( [d_delta_f_.iloc[start, start+1]**2 for i in range(start, end + 1)])) - data_dict[estimator_name.upper()].append(result) - data_dict[estimator_name.upper() + '_Error'].append(error) + data_dict[estimator_name].append(result) + data_dict[estimator_name + '_Error'].append(error) # Total result result = delta_f_.iloc[0, -1] @@ -505,8 +505,8 @@ def generate_result(self): error = np.sqrt(sum( [d_delta_f_.iloc[i, i + 1] ** 2 for i in range(num_states - 1)])) - data_dict[estimator_name.upper()].append(result) - data_dict[estimator_name.upper() + '_Error'].append(error) + data_dict[estimator_name].append(result) + data_dict[estimator_name + '_Error'].append(error) summary = pd.DataFrame.from_dict(data_dict) summary = summary.set_index(['state', 'name']) @@ -632,7 +632,7 @@ def check_convergence(self, forwrev, estimator='MBAR', dF_t='dF_t.pdf', dF_t : str The filename for the plot of convergence. Default: 'dF_t.pdf' ax : matplotlib.axes.Axes - Matplotlib axes object where the plot will be drawn on. If ax=None, + Matplotlib axes object where the plot will be drawn on. If ``ax=None``, a new axes will be generated. Attributes From 5c829d060c4ea65cf87d1f96551984bbdbafac5d Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Sun, 14 Aug 2022 10:03:05 +0100 Subject: [PATCH 095/123] fix test --- src/alchemlyb/tests/test_workflow_ABFE.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index f9a18fc1..413db22d 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -296,7 +296,8 @@ def workflow(tmp_path_factory): os.path.dirname(load_bace_example()['data']['complex']['vdw'][0])) workflow = ABFE(units='kcal/mol', software='Amber', dir=dir, - prefix='TI', suffix='bz2', T=310, outdirectory=str(outdir)) + prefix='ti', suffix='bz2', T=310, outdirectory=str( + outdir)) workflow.read() workflow.estimate(methods='TI') return workflow From 50f743257eea9b8e4a25c378677b50594c6c933c Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Sun, 14 Aug 2022 10:09:09 +0100 Subject: [PATCH 096/123] fix test --- src/alchemlyb/tests/test_visualisation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alchemlyb/tests/test_visualisation.py b/src/alchemlyb/tests/test_visualisation.py index bcaf4a32..d0e34fb2 100644 --- a/src/alchemlyb/tests/test_visualisation.py +++ b/src/alchemlyb/tests/test_visualisation.py @@ -130,7 +130,7 @@ def test_plot_dF_state(): def test_plot_convergence_dataframe(): bz = load_benzene().data data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] - df = forward_backward_convergence(data_list, 'mbar') + df = forward_backward_convergence(data_list, 'MBAR') ax = plot_convergence(df) assert isinstance(ax, matplotlib.axes.Axes) plt.close(ax.figure) From 2b0941dcca691c343877a27ff6bad25010391e64 Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Sun, 14 Aug 2022 10:57:33 +0100 Subject: [PATCH 097/123] remove no cover --- src/alchemlyb/tests/test_workflow_ABFE.py | 29 ++++++++++++++++ src/alchemlyb/workflows/abfe.py | 42 +++++++++++------------ 2 files changed, 49 insertions(+), 22 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index 413db22d..639a0ac4 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -110,6 +110,35 @@ def test_dhdl_TI_noTI(self, workflow, monkeypatch): with pytest.raises(ValueError): workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') + def test_noMBAR_for_plot_overlap_matrix(self, workflow, monkeypatch): + monkeypatch.setattr(workflow, 'estimator', {}) + assert workflow.plot_overlap_matrix() is None + + def test_nou_nk_for_check_convergence(self, workflow, monkeypatch): + monkeypatch.setattr(workflow, 'u_nk_list', []) + monkeypatch.setattr(workflow, 'u_nk_sample_list', []) + with pytest.raises(ValueError): + workflow.check_convergence(10, estimator='MBAR') + + def test_nodHdl_for_check_convergence(self, workflow, monkeypatch): + monkeypatch.setattr(workflow, 'dHdl_list', []) + monkeypatch.setattr(workflow, 'dHdl_sample_list', []) + with pytest.raises(ValueError): + workflow.check_convergence(10, estimator='TI') + + def test_nou_nk_for_estimate(self, workflow, monkeypatch): + monkeypatch.setattr(workflow, 'u_nk_list', []) + monkeypatch.setattr(workflow, 'u_nk_sample_list', []) + with pytest.raises(ValueError): + workflow.estimate(methods='MBAR') + + def test_nodHdl_for_estimate(self, workflow, monkeypatch): + monkeypatch.setattr(workflow, 'dHdl_list', []) + monkeypatch.setattr(workflow, 'dHdl_sample_list', []) + with pytest.raises(ValueError): + workflow.estimate(methods='TI') + + class Test_automatic_benzene(): '''Test the full automatic workflow for load_benzene from alchemtest.gmx for single stage transformation.''' diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 9d4ae79d..3c285411 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -361,10 +361,7 @@ def estimate(self, methods=('MBAR', 'BAR', 'TI')): elif estimator == 'TI' and len(dHdl) > 0: self.logger.info('Run TI estimator.') self.estimator[estimator] = TI().fit(dHdl) - elif estimator in FEP_ESTIMATORS: # pragma: no cover - self.logger.warning(f'{estimator} estimator require u_nk') - else: - self.logger.warning(f'{estimator} estimator require dHdl') + def generate_result(self): '''Summarise the result into a dataframe. @@ -432,13 +429,10 @@ def generate_result(self): stages = u_nk.reset_index('time').index.names self.logger.info('use the stage name from u_nk') except: - try: - dHdl = self.dHdl_list[0] - stages = dHdl.reset_index('time').index.names - self.logger.info('use the stage name from dHdl') - except: # pragma: no cover - stages = [] - self.logger.warning('No stage name found in dHdl or u_nk') + dHdl = self.dHdl_list[0] + stages = dHdl.reset_index('time').index.names + self.logger.info('use the stage name from dHdl') + for stage in stages: data_dict['name'].append(stage.split('-')[0]) data_dict['state'].append('Stages') @@ -454,14 +448,14 @@ def generate_result(self): d_delta_f_ = estimator.d_delta_f_ # Write the estimator header - col_names.append(estimator_name.upper()) - col_names.append(estimator_name.upper() + '_Error') - data_dict[estimator_name.upper()] = [] - data_dict[estimator_name.upper() + '_Error'] = [] + col_names.append(estimator_name) + col_names.append(estimator_name + '_Error') + data_dict[estimator_name] = [] + data_dict[estimator_name + '_Error'] = [] for index in range(1, num_states): - data_dict[estimator_name.upper()].append( + data_dict[estimator_name].append( delta_f_.iloc[index-1, index]) - data_dict[estimator_name.upper() + '_Error'].append( + data_dict[estimator_name + '_Error'].append( d_delta_f_.iloc[index - 1, index]) self.logger.info(f'Generate the staged result from estimator {estimator_name}') @@ -546,7 +540,7 @@ def plot_overlap_matrix(self, overlap='O_MBAR.pdf', ax=None): ax.figure.savefig(join(self.out, overlap)) self.logger.info(f'Plot overlap matrix to {self.out} under {overlap}.') return ax - else: # pragma: no cover + else: self.logger.warning('MBAR estimator not found. ' 'Overlap matrix not plotted.') @@ -656,8 +650,9 @@ def check_convergence(self, forwrev, estimator='MBAR', dF_t='dF_t.pdf', u_nk_list = self.u_nk_list self.logger.info('Subsampled u_nk not available, ' 'use original data instead.') - else: # pragma: no cover - self.logger.warning('u_nk is not available.') + else: + self.logger.error(f'u_nk is needed for the f{estimator} estimator.') + raise ValueError(f'u_nk is needed for the f{estimator} estimator.') convergence = forward_backward_convergence(u_nk_list, estimator=estimator, num=forwrev) @@ -672,8 +667,11 @@ def check_convergence(self, forwrev, estimator='MBAR', dF_t='dF_t.pdf', dHdl_list = self.dHdl_list self.logger.info('Subsampled dHdl not available, ' 'use original data instead.') - else: # pragma: no cover - self.logger.warning('dHdl is not available.') + else: + self.logger.error( + f'dHdl is needed for the f{estimator} estimator.') + raise ValueError( + f'dHdl is needed for the f{estimator} estimator.') convergence = forward_backward_convergence(dHdl_list, estimator=estimator, num=forwrev) From 2d173a8615124b36d1c83f9eb8641c1ef5a5f7ab Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Sun, 14 Aug 2022 11:04:38 +0100 Subject: [PATCH 098/123] bump coverage --- src/alchemlyb/tests/test_workflow_ABFE.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index 639a0ac4..4b0565e1 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -121,20 +121,20 @@ def test_nou_nk_for_check_convergence(self, workflow, monkeypatch): workflow.check_convergence(10, estimator='MBAR') def test_nodHdl_for_check_convergence(self, workflow, monkeypatch): - monkeypatch.setattr(workflow, 'dHdl_list', []) - monkeypatch.setattr(workflow, 'dHdl_sample_list', []) + monkeypatch.setattr(workflow, 'dHdl_list', None) + monkeypatch.setattr(workflow, 'dHdl_sample_list', None) with pytest.raises(ValueError): workflow.check_convergence(10, estimator='TI') def test_nou_nk_for_estimate(self, workflow, monkeypatch): - monkeypatch.setattr(workflow, 'u_nk_list', []) - monkeypatch.setattr(workflow, 'u_nk_sample_list', []) + monkeypatch.setattr(workflow, 'u_nk_list', None) + monkeypatch.setattr(workflow, 'u_nk_sample_list', None) with pytest.raises(ValueError): workflow.estimate(methods='MBAR') def test_nodHdl_for_estimate(self, workflow, monkeypatch): - monkeypatch.setattr(workflow, 'dHdl_list', []) - monkeypatch.setattr(workflow, 'dHdl_sample_list', []) + monkeypatch.setattr(workflow, 'dHdl_list', None) + monkeypatch.setattr(workflow, 'dHdl_sample_list', None) with pytest.raises(ValueError): workflow.estimate(methods='TI') From b2ed582c51fed26c081f51b570554f28dade8fea Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Sun, 14 Aug 2022 11:12:40 +0100 Subject: [PATCH 099/123] bump coverage --- src/alchemlyb/tests/test_workflow_ABFE.py | 23 ++++++++++++----------- src/alchemlyb/workflows/abfe.py | 12 ++++++------ 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index 4b0565e1..bc6b626f 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -126,17 +126,8 @@ def test_nodHdl_for_check_convergence(self, workflow, monkeypatch): with pytest.raises(ValueError): workflow.check_convergence(10, estimator='TI') - def test_nou_nk_for_estimate(self, workflow, monkeypatch): - monkeypatch.setattr(workflow, 'u_nk_list', None) - monkeypatch.setattr(workflow, 'u_nk_sample_list', None) - with pytest.raises(ValueError): - workflow.estimate(methods='MBAR') - - def test_nodHdl_for_estimate(self, workflow, monkeypatch): - monkeypatch.setattr(workflow, 'dHdl_list', None) - monkeypatch.setattr(workflow, 'dHdl_sample_list', None) - with pytest.raises(ValueError): - workflow.estimate(methods='TI') + def test_no_update_units(self, workflow, monkeypatch): + assert workflow.update_units() is None class Test_automatic_benzene(): @@ -276,6 +267,16 @@ def test_uncorr_threshold(self, workflow, monkeypatch): assert all([len(u_nk) == 40 for u_nk in workflow.u_nk_sample_list]) assert all([len(dHdl) == 40 for dHdl in workflow.dHdl_sample_list]) + def test_no_u_nk_preprocess(self, workflow, monkeypatch): + monkeypatch.setattr(workflow, 'u_nk_list', []) + workflow.preprocess(threshold=50) + assert len(workflow.u_nk_list) == 0 + + def test_no_dHdl_preprocess(self, workflow, monkeypatch): + monkeypatch.setattr(workflow, 'dHdl_list', []) + workflow.preprocess(threshold=50) + assert len(workflow.dHdl_list) == 0 + def test_single_estimator_mbar(self, workflow): workflow.estimate(methods='MBAR') assert len(workflow.estimator) == 1 diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 3c285411..d5fede92 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -231,7 +231,7 @@ def update_units(self, units=None): if units is not None: self.logger.info(f'Set unit to {units}.') self.units = units or None - else: # pragma: no cover + else: pass def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): @@ -283,7 +283,7 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): self.logger.info(f'Take {len(subsample)} uncorrelated ' f'u_nk for state {index}.') self.u_nk_sample_list.append(subsample) - else: # pragma: no cover + else: self.logger.info('No u_nk data being subsampled') if len(self.dHdl_list) > 0: @@ -301,7 +301,7 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): self.logger.info(f'Take {len(subsample)} uncorrelated ' f'dHdl for state {index}.') self.dHdl_sample_list.append(subsample) - else: # pragma: no cover + else: self.logger.info('No dHdl data being subsampled') def estimate(self, methods=('MBAR', 'BAR', 'TI')): @@ -352,13 +352,13 @@ def estimate(self, methods=('MBAR', 'BAR', 'TI')): f'A total {len(u_nk)} lines of u_nk is used.') for estimator in methods: - if estimator == 'MBAR' and len(u_nk) > 0: + if estimator == 'MBAR': self.logger.info('Run MBAR estimator.') self.estimator[estimator] = MBAR().fit(u_nk) - elif estimator == 'BAR' and len(u_nk) > 0: + elif estimator == 'BAR': self.logger.info('Run BAR estimator.') self.estimator[estimator] = BAR().fit(u_nk) - elif estimator == 'TI' and len(dHdl) > 0: + elif estimator == 'TI': self.logger.info('Run TI estimator.') self.estimator[estimator] = TI().fit(dHdl) From 80b9bb340877787eb51fdcd639987a3034a187a8 Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Sun, 14 Aug 2022 11:22:01 +0100 Subject: [PATCH 100/123] bump coverage --- src/alchemlyb/tests/test_workflow_ABFE.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index bc6b626f..ab73958d 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -114,21 +114,25 @@ def test_noMBAR_for_plot_overlap_matrix(self, workflow, monkeypatch): monkeypatch.setattr(workflow, 'estimator', {}) assert workflow.plot_overlap_matrix() is None - def test_nou_nk_for_check_convergence(self, workflow, monkeypatch): - monkeypatch.setattr(workflow, 'u_nk_list', []) - monkeypatch.setattr(workflow, 'u_nk_sample_list', []) + def test_no_u_nk_for_check_convergence(self, workflow, monkeypatch): + monkeypatch.setattr(workflow, 'u_nk_list', None) + monkeypatch.setattr(workflow, 'u_nk_sample_list', None) with pytest.raises(ValueError): workflow.check_convergence(10, estimator='MBAR') - def test_nodHdl_for_check_convergence(self, workflow, monkeypatch): + def test_no_dHdl_for_check_convergence(self, workflow, monkeypatch): monkeypatch.setattr(workflow, 'dHdl_list', None) monkeypatch.setattr(workflow, 'dHdl_sample_list', None) with pytest.raises(ValueError): workflow.check_convergence(10, estimator='TI') - def test_no_update_units(self, workflow, monkeypatch): + def test_no_update_units(self, workflow): assert workflow.update_units() is None + def test_no_name_estimate(self, workflow): + with pytest.raises(ValueError): + workflow.estimate('aaa') + class Test_automatic_benzene(): '''Test the full automatic workflow for load_benzene from alchemtest.gmx for From d5c9ea5659880b10b678324fb40ea56bbbe61472 Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Sun, 14 Aug 2022 11:45:37 +0100 Subject: [PATCH 101/123] remove no cover --- src/alchemlyb/tests/test_workflow_ABFE.py | 5 +++++ src/alchemlyb/workflows/abfe.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index ab73958d..071997a4 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -340,3 +340,8 @@ def test_summary(self, workflow): '''Test if if the summary is right.''' summary = workflow.generate_result() assert np.isclose(summary['TI']['Stages']['TOTAL'], 1.40405980473, 0.1) + +def test_no_parser(): + with pytest.raises(NotImplementedError): + workflow = ABFE(units='kcal/mol', software='aaa', + prefix='ti', suffix='bz2', T=310) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index d5fede92..7a68e568 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -90,7 +90,7 @@ def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, elif software.lower() == 'amber': self._extract_u_nk = amber.extract_u_nk self._extract_dHdl = amber.extract_dHdl - else: # pragma: no cover + else: raise NotImplementedError(f'{software} parser not found.') def read(self): From d061fdc5910335ef033547d07e76ad6d67252386 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 22 Aug 2022 09:13:36 +0100 Subject: [PATCH 102/123] update --- CHANGES | 2 +- docs/api_principles.rst | 4 ++-- docs/workflows.rst | 4 ++-- docs/workflows/alchemlyb.workflows.ABFE.rst | 19 +++++++++++-------- src/alchemlyb/workflows/abfe.py | 2 -- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/CHANGES b/CHANGES index 6fac5708..c91acca7 100644 --- a/CHANGES +++ b/CHANGES @@ -21,6 +21,7 @@ Changes - Drop support for py3.7 (Issue #179, PR #214) Enhancements + - Add a base class for workflows (PR #188). Fixes @@ -39,7 +40,6 @@ Changes - remove broken .zip support from util.anyopen() (PR #197) Enhancements - - Add a base class for workflows (PR #188). - Add the ABFE workflow (PR #114). - Add filter function to gmx.extract to make it more robust (PR #183): can filter incomplete/corrupted lines (#126, #171) with filter=True. diff --git a/docs/api_principles.rst b/docs/api_principles.rst index 7584128e..8fc6b613 100644 --- a/docs/api_principles.rst +++ b/docs/api_principles.rst @@ -75,8 +75,8 @@ The library is structured as follows, following a similar style to * The :mod:`~alchemlyb.convergence` submodule features convenience functions/classes for doing convergence analysis using a given dataset and a chosen estimator. * The :mod:`~alchemlyb.postprocessors` submodule contains functions to calculate new quantities or express data in different units. * The :mod:`~alchemlyb.visualisation` submodule contains convenience plotting functions as known from, for example, `alchemical-analysis.py`_. -* The :mod:`~alchemlyb.workflows` submodule will contain complete analysis workflows that will serve as larger building blocks for complex analysis pipelines or a command line interface. - +* The :mod:`~alchemlyb.workflows` submodule contains complete analysis workflows ... + For example, :mod:`alchemlyb.workflows.abfe` implements a complete absolute binding free energy calculation.". All of these components lend themselves well to writing clear and flexible pipelines for processing data needed for alchemical free energy calculations, and furthermore allow for scaling up via libraries like `dask`_ or `joblib`_. diff --git a/docs/workflows.rst b/docs/workflows.rst index c8ecfc78..be747509 100644 --- a/docs/workflows.rst +++ b/docs/workflows.rst @@ -7,10 +7,10 @@ of the results and step-by-step version that allows more flexibility. For developers, the skeleton of the workflow should follow the example in :class:`alchemlyb.workflows.base.WorkflowBase`. -For users, **alchemlyb** offered a workflow :class:`alchemlyb.workflows.ABFE` +For users, **alchemlyb** offers a workflow :class:`alchemlyb.workflows.ABFE` similar to `Alchemical Analysis `_ -for doing automatic ABFE analysis. +for doing automatic absolute binding free energy (ABFE) analysis. .. currentmodule:: alchemlyb.workflows diff --git a/docs/workflows/alchemlyb.workflows.ABFE.rst b/docs/workflows/alchemlyb.workflows.ABFE.rst index 4ab1a86a..23e11e43 100644 --- a/docs/workflows/alchemlyb.workflows.ABFE.rst +++ b/docs/workflows/alchemlyb.workflows.ABFE.rst @@ -1,9 +1,8 @@ The ABFE workflow ================== -Though **alchemlyb** is a library offering great flexibility in deriving free -energy estimate, it also provide a easy pipeline that is similar to -`Alchemical Analysis `_ and a -step-by-step version that allows more flexibility. +The *Absolute binding free energy* (ABFE) workflow provides a complete workflow +that uses the energy files generated by MD engine as input and generates the +binding free energy as well as the analysis plots. Fully Automatic analysis ------------------------ @@ -14,7 +13,7 @@ In this case, any parameters are set when invoking :class:`~alchemlyb.workflows. and reasonable defaults are chosen for any parameters not set explicitly. The two steps are to -1. initialize an instance of the :class:`~alchemlyb.workflows.abfe.ABFE` class +1. initialize an instance of the :class:`~alchemlyb.workflows.abfe.ABFE` class 2. invoke the :meth:`~alchemlyb.workflows.ABFE.run` method to execute complete workflow. @@ -38,9 +37,13 @@ to the following code:: >>> breakdown=True, forwrev=10) -See :mod:`~alchemlyb.workflows.ABFE` for the explanation with regard to the -parameters. The next two sections explains the output of the workflow and a -set of analysis that allows the user to examine the quality of the estimate. +The workflow uses the :class:`~alchemlyb.parsing` to parse the data from the +energy files, remove the initial unequilibrated frames and decorrelate the data +with :class:`~alchemlyb.preprocessing.subsampling`. The dataset +:ref:`dHdl ` and :ref:`u_nk ` are then passed to +:class:`~alchemlyb.estimators` for free energy estimation. The workflow will +also perform a set of analysis that allows the user to examine the quality of +the estimation, which is explained in the next two sections. File Input ^^^^^^^^^^ diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 7a68e568..229197c0 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -231,8 +231,6 @@ def update_units(self, units=None): if units is not None: self.logger.info(f'Set unit to {units}.') self.units = units or None - else: - pass def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): '''Preprocess the data by removing the equilibration time and From a778a0ec90b6db7e782fa0f460b92b1bae077882 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 22 Aug 2022 11:53:52 +0100 Subject: [PATCH 103/123] update --- .github/workflows/ci.yaml | 2 +- CHANGES | 2 +- docs/workflows/alchemlyb.workflows.ABFE.rst | 9 ++- src/alchemlyb/convergence/convergence.py | 36 +++++---- src/alchemlyb/estimators/mbar_.py | 45 +++++++---- src/alchemlyb/tests/test_convergence.py | 14 +++- src/alchemlyb/tests/test_workflow_ABFE.py | 43 +++++++---- src/alchemlyb/workflows/abfe.py | 84 +++++++++++++-------- 8 files changed, 150 insertions(+), 85 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3b187bb3..b59701f7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -26,7 +26,7 @@ jobs: test: runs-on: ${{ matrix.os }} strategy: - fail-fast: true + fail-fast: false matrix: os: ["ubuntu-latest", "macOS-latest", "windows-latest"] python-version: ["3.8", "3.9", "3.10"] diff --git a/CHANGES b/CHANGES index c91acca7..50e1858d 100644 --- a/CHANGES +++ b/CHANGES @@ -22,6 +22,7 @@ Changes Enhancements - Add a base class for workflows (PR #188). + - Add the ABFE workflow (PR #114). Fixes @@ -40,7 +41,6 @@ Changes - remove broken .zip support from util.anyopen() (PR #197) Enhancements - - Add the ABFE workflow (PR #114). - Add filter function to gmx.extract to make it more robust (PR #183): can filter incomplete/corrupted lines (#126, #171) with filter=True. - Add support to util.anyopen() for taking filelike objects (PR #197) diff --git a/docs/workflows/alchemlyb.workflows.ABFE.rst b/docs/workflows/alchemlyb.workflows.ABFE.rst index 23e11e43..748febf7 100644 --- a/docs/workflows/alchemlyb.workflows.ABFE.rst +++ b/docs/workflows/alchemlyb.workflows.ABFE.rst @@ -18,7 +18,7 @@ are to complete workflow. For a GROMACS ABFE simulation, executing the workflow would look similar -to the following code:: +to the following code (The log is configured by logger) :: >>> from alchemtest.gmx import load_ABFE >>> from alchemlyb.workflows import ABFE @@ -39,17 +39,18 @@ to the following code:: The workflow uses the :class:`~alchemlyb.parsing` to parse the data from the energy files, remove the initial unequilibrated frames and decorrelate the data -with :class:`~alchemlyb.preprocessing.subsampling`. The dataset +with :class:`~alchemlyb.preprocessing.subsampling`. The decorrelated dataset :ref:`dHdl ` and :ref:`u_nk ` are then passed to :class:`~alchemlyb.estimators` for free energy estimation. The workflow will also perform a set of analysis that allows the user to examine the quality of -the estimation, which is explained in the next two sections. +the estimation. File Input ^^^^^^^^^^ This command expects the energy files to be structured in two common ways. It could either be :: + simulation ├── lambda_0 │   ├── prod.xvg @@ -115,7 +116,7 @@ portrait model and :file:`dF_state_long.pdf` in landscape model, which allows the user to example the contributions from each lambda window. The forward and backward convergence will be plotted to :file:`dF_t.pdf` using -:class:`~alchemlyb.estimators.MBAR` and save in +:class:`~alchemlyb.estimators.MBAR` and saved in :attr:`~alchemlyb.workflows.ABFE.convergence`, which allows the user to examine if the simulation time is enough to achieve a converged result. diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index 755819fc..9a7c71cd 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -1,13 +1,15 @@ -import pandas as pd import logging +from warnings import warn + +import pandas as pd import numpy as np -from ..estimators import BAR, TI +from ..estimators import BAR, TI, FEP_ESTIMATORS, TI_ESTIMATORS from ..estimators import AutoMBAR as MBAR from .. import concat -def forward_backward_convergence(df_list, estimator='MBAR', num=10): +def forward_backward_convergence(df_list, estimator='MBAR', num=10, **kwargs): '''Forward and backward convergence of the free energy estimate. Generate the free energy estimate as a function of time in both directions, @@ -25,6 +27,8 @@ def forward_backward_convergence(df_list, estimator='MBAR', num=10): Name of the estimators. num : int The number of time points. + kwargs : dict + Keyword arguments to be passed to the estimator. Returns ------- @@ -45,25 +49,29 @@ def forward_backward_convergence(df_list, estimator='MBAR', num=10): .. versionadded:: 0.6.0 + .. versionchanged:: 1.0.0 + The ``estimator`` accepts uppercase input. ''' logger = logging.getLogger('alchemlyb.convergence.' 'forward_backward_convergence') logger.info('Start convergence analysis.') logger.info('Check data availability.') + if estimator.upper() != estimator: + warn("Using lower-case strings for the 'estimator' kwarg in " + "convergence.forward_backward_convergence() is deprecated in " + "1.0.0 and only upper case will be accepted in 2.0.0", + DeprecationWarning) + estimator = estimator.upper() - if estimator == 'MBAR': - logger.info('Use AutoMBAR estimator for convergence analysis.') - estimator_fit = MBAR().fit - elif estimator == 'BAR': - logger.info('Use BAR estimator for convergence analysis.') - estimator_fit = BAR().fit - elif estimator == 'TI': - logger.info('Use TI estimator for convergence analysis.') - estimator_fit = TI().fit + if estimator not in (FEP_ESTIMATORS + TI_ESTIMATORS): + msg = f"Estimator {estimator} is not available in {FEP_ESTIMATORS + TI_ESTIMATORS}." + logger.error(msg) + raise ValueError(msg) else: - raise ValueError( - '{} is not a valid estimator.'.format(estimator)) + # select estimator class by name + estimator_fit = globals()[estimator](**kwargs).fit + logger.info(f'Use {estimator} estimator for convergence analysis.') logger.info('Begin forward analysis') forward_list = [] diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py index e40925b7..729ced48 100644 --- a/src/alchemlyb/estimators/mbar_.py +++ b/src/alchemlyb/estimators/mbar_.py @@ -152,6 +152,14 @@ class AutoMBAR(MBAR): :class:`AutoMBAR` may be useful in high-throughput calculations where it can avoid failures due non-converged MBAR estimates. + Parameters + ---------- + + method : str, optional, default=None + The optimization routine to use. This parameter is default to None. + When a specific method is set, AutoMBAR will behave in the same ways + as MBAR. + Note ---- All arguments are described under :class:`MBAR` except that the solver method @@ -163,30 +171,35 @@ class AutoMBAR(MBAR): .. versionadded:: 0.6.0 + .. versionchanged:: 1.0.0 + The AutoMBAR accepts method argument. """ def __init__(self, maximum_iterations=10000, relative_tolerance=1.0e-7, - initial_f_k=None, verbose=False): + initial_f_k=None, verbose=False, method=None): super().__init__(maximum_iterations=maximum_iterations, relative_tolerance=relative_tolerance, initial_f_k=initial_f_k, - verbose=verbose, method=None) + verbose=verbose, method=method) self.logger = logging.getLogger('alchemlyb.estimators.AutoMBAR') def _do_MBAR(self, u_nk, N_k, solver_protocol): - self.logger.info('Initialise the automatic routine of the MBAR ' - 'estimator.') - # Try the fastest method first - try: - self.logger.info('Trying the hybr method.') - solver_protocol["method"] = 'hybr' - mbar, out = super()._do_MBAR(u_nk, N_k, solver_protocol) - except pymbar.utils.ParameterError: + if solver_protocol["method"] is None: + self.logger.info('Initialise the automatic routine of the MBAR ' + 'estimator.') + # Try the fastest method first try: - self.logger.info('Trying the adaptive method.') - solver_protocol["method"] = 'adaptive' + self.logger.info('Trying the hybr method.') + solver_protocol["method"] = 'hybr' mbar, out = super()._do_MBAR(u_nk, N_k, solver_protocol) except pymbar.utils.ParameterError: - self.logger.info('Trying the BFGS method.') - solver_protocol["method"] = 'BFGS' - mbar, out = super()._do_MBAR(u_nk, N_k, solver_protocol) - return mbar, out + try: + self.logger.info('Trying the adaptive method.') + solver_protocol["method"] = 'adaptive' + mbar, out = super()._do_MBAR(u_nk, N_k, solver_protocol) + except pymbar.utils.ParameterError: + self.logger.info('Trying the BFGS method.') + solver_protocol["method"] = 'BFGS' + mbar, out = super()._do_MBAR(u_nk, N_k, solver_protocol) + return mbar, out + else: + return super()._do_MBAR(u_nk, N_k, solver_protocol) diff --git a/src/alchemlyb/tests/test_convergence.py b/src/alchemlyb/tests/test_convergence.py index 07a38193..2107bc8a 100644 --- a/src/alchemlyb/tests/test_convergence.py +++ b/src/alchemlyb/tests/test_convergence.py @@ -48,5 +48,15 @@ def test_convergence_bar(gmx_benzene): def test_convergence_wrong_estimator(gmx_benzene): dHdl, u_nk = gmx_benzene - with pytest.raises(ValueError, match="{} is not a valid estimator".format("www")): - convergence = forward_backward_convergence(u_nk, 'www') + with pytest.raises(ValueError, match="is not available in"): + forward_backward_convergence(u_nk, 'www') + +def test_convergence_wrong_cases(gmx_benzene): + dHdl, u_nk = gmx_benzene + with pytest.warns(DeprecationWarning, match="Using lower-case strings for"): + forward_backward_convergence(u_nk, 'mbar') + +def test_convergence_method(gmx_benzene): + dHdl, u_nk = gmx_benzene + convergence = forward_backward_convergence(u_nk, 'MBAR', num=2, method='adaptive') + assert len(convergence) == 2 diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index 071997a4..6ce1ad13 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -15,10 +15,10 @@ class Test_automatic_ABFE(): def workflow(tmp_path_factory): outdir = tmp_path_factory.mktemp("out") dir = os.path.dirname(load_ABFE()['data']['complex'][0]) - workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, + workflow = ABFE(units='kcal/mol', software='GROMACS', dir=dir, prefix='dhdl', suffix='xvg', T=310, outdirectory=str(outdir)) workflow.run(skiptime=10, uncorr='dhdl', threshold=50, - methods=('MBAR', 'BAR', 'TI'), overlap='O_MBAR.pdf', + estimators=('MBAR', 'BAR', 'TI'), overlap='O_MBAR.pdf', breakdown=True, forwrev=10) return workflow @@ -68,6 +68,19 @@ def test_check_convergence(self, workflow): assert os.path.isfile(os.path.join(workflow.out, 'dF_t.pdf')) assert len(workflow.convergence) == 10 + def test_estimator_method(self, workflow, monkeypatch): + '''Test if the method keyword could be passed to the AutoMBAR estimator.''' + monkeypatch.setattr(workflow, 'estimator', + dict()) + workflow.estimate(estimators='MBAR', method='adaptive') + assert 'MBAR' in workflow.estimator + + def test_convergence_method(self, workflow, monkeypatch): + '''Test if the method keyword could be passed to the AutoMBAR estimator from convergence.''' + monkeypatch.setattr(workflow, 'convergence', None) + workflow.check_convergence(2, estimator='MBAR', method='adaptive') + assert len(workflow.convergence) == 2 + class Test_manual_ABFE(Test_automatic_ABFE): '''Test the manual workflow for load_ABFE from alchemtest.gmx for three stage transformation.''' @@ -77,12 +90,12 @@ class Test_manual_ABFE(Test_automatic_ABFE): def workflow(tmp_path_factory): outdir = tmp_path_factory.mktemp("out") dir = os.path.dirname(load_ABFE()['data']['complex'][0]) - workflow = ABFE(software='Gromacs', dir=dir, prefix='dhdl', + workflow = ABFE(software='GROMACS', dir=dir, prefix='dhdl', suffix='xvg', T=310, outdirectory=str(outdir)) workflow.update_units('kcal/mol') workflow.read() workflow.preprocess(skiptime=10, uncorr='dhdl', threshold=50) - workflow.estimate(methods=('MBAR', 'BAR', 'TI')) + workflow.estimate(estimators=('MBAR', 'BAR', 'TI')) workflow.plot_overlap_matrix(overlap='O_MBAR.pdf') workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') workflow.plot_dF_state(dF_state='dF_state.pdf') @@ -145,11 +158,11 @@ def workflow(tmp_path_factory): dir = os.path.dirname(os.path.dirname( load_benzene()['data']['Coulomb'][0])) dir = os.path.join(dir, '*') - workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, + workflow = ABFE(units='kcal/mol', software='GROMACS', dir=dir, prefix='dhdl', suffix='bz2', T=310, outdirectory=outdir) workflow.run(skiptime=0, uncorr='dhdl', threshold=50, - methods=('MBAR', 'BAR', 'TI'), overlap='O_MBAR.pdf', + estimators=('MBAR', 'BAR', 'TI'), overlap='O_MBAR.pdf', breakdown=True, forwrev=10) return workflow @@ -205,7 +218,7 @@ def workflow(tmp_path_factory): dir = os.path.dirname(os.path.dirname( load_benzene()['data']['Coulomb'][0])) dir = os.path.join(dir, '*') - workflow = ABFE(software='Gromacs', dir=dir, prefix='dhdl', + workflow = ABFE(software='GROMACS', dir=dir, prefix='dhdl', suffix='bz2', T=310, outdirectory=outdir) workflow.read() # Block the n_uk @@ -216,7 +229,7 @@ def workflow(tmp_path_factory): dHdl.insert(1, 'bound', [1.0, ] * len(dHdl)) dHdl.set_index('bound-lambda', append=True, inplace=True) - workflow.estimate(methods=('TI', )) + workflow.estimate(estimators=('TI', )) workflow.plot_ti_dhdl(dhdl_TI='dhdl_TI.pdf') workflow.plot_dF_state(dF_state='dF_state.pdf') workflow.check_convergence(10, dF_t='dF_t.pdf', estimator='TI') @@ -236,7 +249,7 @@ def test_convergence(self, workflow): assert len(workflow.convergence) == 10 def test_single_estimator_ti(self, workflow): - workflow.estimate(methods='TI') + workflow.estimate(estimators='TI') summary = workflow.generate_result() assert np.isclose(summary['TI']['Stages']['TOTAL'], 2.946, 0.1) @@ -250,14 +263,14 @@ def workflow(tmp_path_factory): dir = os.path.dirname(os.path.dirname( load_benzene()['data']['Coulomb'][0])) dir = os.path.join(dir, '*') - workflow = ABFE(software='Gromacs', dir=dir, prefix='dhdl', + workflow = ABFE(software='GROMACS', dir=dir, prefix='dhdl', suffix='bz2', T=310, outdirectory=outdir) workflow.read() return workflow def test_run_none(self, workflow): '''Don't run anything''' - workflow.run(uncorr=None, methods=None, overlap=None, breakdown=None, + workflow.run(uncorr=None, estimators=None, overlap=None, breakdown=None, forwrev=None) def test_uncorr_threshold(self, workflow, monkeypatch): @@ -282,14 +295,14 @@ def test_no_dHdl_preprocess(self, workflow, monkeypatch): assert len(workflow.dHdl_list) == 0 def test_single_estimator_mbar(self, workflow): - workflow.estimate(methods='MBAR') + workflow.estimate(estimators='MBAR') assert len(workflow.estimator) == 1 assert 'MBAR' in workflow.estimator summary = workflow.generate_result() assert np.isclose(summary['MBAR']['Stages']['TOTAL'], 2.946, 0.1) def test_single_estimator_ti(self, workflow): - workflow.estimate(methods='TI') + workflow.estimate(estimators='TI') summary = workflow.generate_result() assert np.isclose(summary['TI']['Stages']['TOTAL'], 2.946, 0.1) @@ -329,11 +342,11 @@ def workflow(tmp_path_factory): dir, _ = os.path.split( os.path.dirname(load_bace_example()['data']['complex']['vdw'][0])) - workflow = ABFE(units='kcal/mol', software='Amber', dir=dir, + workflow = ABFE(units='kcal/mol', software='AMBER', dir=dir, prefix='ti', suffix='bz2', T=310, outdirectory=str( outdir)) workflow.read() - workflow.estimate(methods='TI') + workflow.estimate(estimators='TI') return workflow def test_summary(self, workflow): diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 229197c0..b9265169 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -29,6 +29,8 @@ class ABFE(WorkflowBase): Parameters ---------- + T : float + Temperature in K. units : str The unit used for printing and plotting results. {'kcal/mol', 'kJ/mol', 'kT'}. Default: 'kT'. @@ -43,8 +45,6 @@ class ABFE(WorkflowBase): Prefix for datafile sets. Default: 'dhdl'. suffix : str Suffix for datafile sets. Default: 'xvg'. - T : float - Temperature in K. Default: 298. outdirectory : str Directory in which the output files produced by this script will be stored. Default: os.path.curdir. @@ -58,8 +58,8 @@ class ABFE(WorkflowBase): file_list : list The list of filenames sorted by the lambda state. ''' - def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, - prefix='dhdl', suffix='xvg', T=298, + def __init__(self, T, units='kT', software='GROMACS', dir=os.path.curdir, + prefix='dhdl', suffix='xvg', outdirectory=os.path.curdir, ignore_warnings=False): @@ -83,11 +83,11 @@ def __init__(self, units='kT', software='Gromacs', dir=os.path.curdir, self.logger.info("Unsorted file list: \n %s", '\n'.join( self.file_list)) - if software.lower() == 'gromacs': + if software == 'GROMACS': self.logger.info(f'Using {software} parser to read the data.') self._extract_u_nk = gmx.extract_u_nk self._extract_dHdl = gmx.extract_dHdl - elif software.lower() == 'amber': + elif software == 'AMBER': self._extract_u_nk = amber.extract_u_nk self._extract_dHdl = amber.extract_dHdl else: @@ -112,8 +112,8 @@ def read(self): self.logger.info( f'Reading {len(u_nk)} lines of u_nk from {file}') u_nk_list.append(u_nk) - except: # pragma: no cover - msg = f'Error reading read u_nk from {file}.' + except Exception as exc: # pragma: no cover + msg = f'Error reading read u_nk from {file}.\n{exc}' if self.ignore_warnings: self.logger.exception(msg + 'This exception is being ignored because ignore_warnings=True.') @@ -126,8 +126,8 @@ def read(self): self.logger.info( f'Reading {len(dhdl)} lines of dhdl from {file}') dHdl_list.append(dhdl) - except: # pragma: no cover - msg = f'Error reading read dhdl from {file}.' + except Exception as exc: # pragma: no cover + msg = f'Error reading read dhdl from {file}.\n{exc}' if self.ignore_warnings: self.logger.exception(msg + 'This exception is being ignored because ignore_warnings=True.') @@ -150,7 +150,7 @@ def read(self): self.dHdl_sample_list = None def run(self, skiptime=0, uncorr='dhdl', threshold=50, - methods=('MABR', 'BAR', 'TI'), overlap='O_MBAR.pdf', + estimators=('MBAR', 'BAR', 'TI'), overlap='O_MBAR.pdf', breakdown=True, forwrev=10, *args, **kwargs): ''' The method for running the automatic analysis. @@ -167,9 +167,9 @@ def run(self, skiptime=0, uncorr='dhdl', threshold=50, Proceed with correlated samples if the number of uncorrelated samples is found to be less than this number. If 0 is given, the time series analysis will not be performed at all. Default: 50. - methods : str or list of str - A list of the methods to estimate the free energy with. Default: - `('MABR', 'BAR', 'TI')`. + estimators : str or list of str + A list of the estimators to estimate the free energy with. Default: + `('MBAR', 'BAR', 'TI')`. overlap : str The filename for the plot of overlap matrix. Default: 'O_MBAR.pdf'. breakdown : bool @@ -196,8 +196,8 @@ def run(self, skiptime=0, uncorr='dhdl', threshold=50, if uncorr is not None: self.preprocess(skiptime=skiptime, uncorr=uncorr, threshold=threshold) - if methods is not None: - self.estimate(methods) + if estimators is not None: + self.estimate(estimators) self.generate_result() if overlap is not None: @@ -302,36 +302,47 @@ def preprocess(self, skiptime=0, uncorr='dhdl', threshold=50): else: self.logger.info('No dHdl data being subsampled') - def estimate(self, methods=('MBAR', 'BAR', 'TI')): + def estimate(self, estimators=('MBAR', 'BAR', 'TI'), **kwargs): '''Estimate the free energy using the selected estimator. Parameters ---------- - methods : str or list of str - A list of the methods to estimate the free energy with. Default: + estimators : str or list of str + A list of the estimators to estimate the free energy with. Default: ['TI', 'BAR', 'MBAR']. + kwargs : dict + Keyword arguments to be passed to the estimator. + Attributes ---------- estimator : dict The dictionary of estimators. The keys are in ['TI', 'BAR', 'MBAR']. + + Note + ----- + :class:`~alchemlyb.estimators.AutoMBAR` is used when + ``estimators='MBAR'``, supply ``method`` keyword + (:code:`estimate(estimators='MBAR', method='adaptive')`) to restore the + behavior of :class:`~alchemlyb.estimators.MBAR`. + ''' # Make estimators into a tuple - if isinstance(methods, str): - methods = (methods, ) + if isinstance(estimators, str): + estimators = (estimators, ) - for estimator in methods: + for estimator in estimators: if estimator not in (FEP_ESTIMATORS + TI_ESTIMATORS): msg = f"Estimator {estimator} is not available in {FEP_ESTIMATORS + TI_ESTIMATORS}." self.logger.error(msg) raise ValueError(msg) self.logger.info( - f"Start running estimator: {','.join(methods)}.") + f"Start running estimator: {','.join(estimators)}.") self.estimator = {} # Use unprocessed data if preprocess is not performed. - if 'TI' in methods: + if 'TI' in estimators: if self.dHdl_sample_list is not None: dHdl = concat(self.dHdl_sample_list) else: @@ -340,7 +351,7 @@ def estimate(self, methods=('MBAR', 'BAR', 'TI')): self.logger.info( f'A total {len(dHdl)} lines of dHdl is used.') - if 'BAR' in methods or 'MBAR' in methods: + if 'BAR' in estimators or 'MBAR' in estimators: if self.u_nk_sample_list is not None: u_nk = concat(self.u_nk_sample_list) else: @@ -349,16 +360,16 @@ def estimate(self, methods=('MBAR', 'BAR', 'TI')): self.logger.info( f'A total {len(u_nk)} lines of u_nk is used.') - for estimator in methods: + for estimator in estimators: if estimator == 'MBAR': self.logger.info('Run MBAR estimator.') - self.estimator[estimator] = MBAR().fit(u_nk) + self.estimator[estimator] = MBAR(**kwargs).fit(u_nk) elif estimator == 'BAR': self.logger.info('Run BAR estimator.') - self.estimator[estimator] = BAR().fit(u_nk) + self.estimator[estimator] = BAR(**kwargs).fit(u_nk) elif estimator == 'TI': self.logger.info('Run TI estimator.') - self.estimator[estimator] = TI().fit(dHdl) + self.estimator[estimator] = TI(**kwargs).fit(dHdl) def generate_result(self): @@ -607,7 +618,7 @@ def plot_dF_state(self, dF_state='dF_state.pdf', labels=None, colors=None, return fig def check_convergence(self, forwrev, estimator='MBAR', dF_t='dF_t.pdf', - ax=None): + ax=None, **kwargs): '''Compute the forward and backward convergence using :func:`~alchemlyb.convergence.forward_backward_convergence`and plot with @@ -626,6 +637,8 @@ def check_convergence(self, forwrev, estimator='MBAR', dF_t='dF_t.pdf', ax : matplotlib.axes.Axes Matplotlib axes object where the plot will be drawn on. If ``ax=None``, a new axes will be generated. + kwargs : dict + Keyword arguments to be passed to the estimator. Attributes ---------- @@ -635,6 +648,13 @@ def check_convergence(self, forwrev, estimator='MBAR', dF_t='dF_t.pdf', ------- matplotlib.axes.Axes An axes with the convergence drawn. + + Note + ----- + :class:`~alchemlyb.estimators.AutoMBAR` is used when + ``estimator='MBAR'``, supply ``method`` keyword + (:code:`check_convergence(10, estimator='MBAR', method='adaptive')`) to + restore the behavior of :class:`~alchemlyb.estimators.MBAR`. ''' self.logger.info('Start convergence analysis.') self.logger.info('Checking data availability.') @@ -653,7 +673,7 @@ def check_convergence(self, forwrev, estimator='MBAR', dF_t='dF_t.pdf', raise ValueError(f'u_nk is needed for the f{estimator} estimator.') convergence = forward_backward_convergence(u_nk_list, estimator=estimator, - num=forwrev) + num=forwrev, **kwargs) elif estimator in TI_ESTIMATORS: self.logger.warning('No valid FEP estimator or dataset found. ' 'Fallback to TI.') @@ -672,7 +692,7 @@ def check_convergence(self, forwrev, estimator='MBAR', dF_t='dF_t.pdf', f'dHdl is needed for the f{estimator} estimator.') convergence = forward_backward_convergence(dHdl_list, estimator=estimator, - num=forwrev) + num=forwrev, **kwargs) else: msg = f"Estimator {estimator} is not supported. Choose one from " \ f"{FEP_ESTIMATORS+TI_ESTIMATORS}." From ab716860b4fdfc24a42b7538e29e6ab68ee204b3 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 22 Aug 2022 12:00:41 +0100 Subject: [PATCH 104/123] update doc. --- docs/workflows/alchemlyb.workflows.ABFE.rst | 2 +- src/alchemlyb/workflows/abfe.py | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/workflows/alchemlyb.workflows.ABFE.rst b/docs/workflows/alchemlyb.workflows.ABFE.rst index 748febf7..eff24c26 100644 --- a/docs/workflows/alchemlyb.workflows.ABFE.rst +++ b/docs/workflows/alchemlyb.workflows.ABFE.rst @@ -18,7 +18,7 @@ are to complete workflow. For a GROMACS ABFE simulation, executing the workflow would look similar -to the following code (The log is configured by logger) :: +to the following code (The log is configured by logger). :: >>> from alchemtest.gmx import load_ABFE >>> from alchemlyb.workflows import ABFE diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index b9265169..9685361f 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -323,9 +323,9 @@ def estimate(self, estimators=('MBAR', 'BAR', 'TI'), **kwargs): Note ----- :class:`~alchemlyb.estimators.AutoMBAR` is used when - ``estimators='MBAR'``, supply ``method`` keyword - (:code:`estimate(estimators='MBAR', method='adaptive')`) to restore the + ``estimators='MBAR'``, supply ``method`` keyword to restore the behavior of :class:`~alchemlyb.estimators.MBAR`. + (:code:`estimate(estimators='MBAR', method='adaptive')`) ''' # Make estimators into a tuple @@ -652,9 +652,10 @@ def check_convergence(self, forwrev, estimator='MBAR', dF_t='dF_t.pdf', Note ----- :class:`~alchemlyb.estimators.AutoMBAR` is used when - ``estimator='MBAR'``, supply ``method`` keyword - (:code:`check_convergence(10, estimator='MBAR', method='adaptive')`) to - restore the behavior of :class:`~alchemlyb.estimators.MBAR`. + ``estimator='MBAR'``, supply ``method`` keyword to restore the behavior + of :class:`~alchemlyb.estimators.MBAR`. + (:code:`check_convergence(10, estimator='MBAR', method='adaptive')`) + ''' self.logger.info('Start convergence analysis.') self.logger.info('Checking data availability.') From c246e9c4783e7e3a15b7ac5605882e2ee97927c4 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 22 Aug 2022 12:04:40 +0100 Subject: [PATCH 105/123] fix docs --- src/alchemlyb/convergence/convergence.py | 8 ++++++++ src/alchemlyb/workflows/abfe.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index 9a7c71cd..6ae17c35 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -48,6 +48,14 @@ def forward_backward_convergence(df_list, estimator='MBAR', num=10, **kwargs): 9 3.044149 0.016405 3.044385 0.016402 1.0 + Note + ----- + :class:`~alchemlyb.estimators.AutoMBAR` is used when ``estimator='MBAR'``, + supply ``method`` keyword to restore the behavior of + :class:`~alchemlyb.estimators.MBAR`. + (:code:`forward_backward_convergence(u_nk, 'MBAR', num=2, method='adaptive')`) + + .. versionadded:: 0.6.0 .. versionchanged:: 1.0.0 The ``estimator`` accepts uppercase input. diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 9685361f..f7e53710 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -655,7 +655,7 @@ def check_convergence(self, forwrev, estimator='MBAR', dF_t='dF_t.pdf', ``estimator='MBAR'``, supply ``method`` keyword to restore the behavior of :class:`~alchemlyb.estimators.MBAR`. (:code:`check_convergence(10, estimator='MBAR', method='adaptive')`) - + ''' self.logger.info('Start convergence analysis.') self.logger.info('Checking data availability.') From c35ca7a06a267dcfd4458c4288e9b2cdd613eccd Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Fri, 2 Sep 2022 18:41:24 +0100 Subject: [PATCH 106/123] Update docs/workflows/alchemlyb.workflows.ABFE.rst Co-authored-by: Oliver Beckstein --- docs/workflows/alchemlyb.workflows.ABFE.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/workflows/alchemlyb.workflows.ABFE.rst b/docs/workflows/alchemlyb.workflows.ABFE.rst index eff24c26..2130f145 100644 --- a/docs/workflows/alchemlyb.workflows.ABFE.rst +++ b/docs/workflows/alchemlyb.workflows.ABFE.rst @@ -33,7 +33,7 @@ to the following code (The log is configured by logger). :: >>> workflow = ABFE(units='kcal/mol', software='Gromacs', dir=dir, >>> prefix='dhdl', suffix='xvg', T=298, outdirectory='./') >>> workflow.run(skiptime=10, uncorr='dhdl', threshold=50, - >>> methods=('mbar', 'bar', 'ti'), overlap='O_MBAR.pdf', + >>> methods=('MBAR', 'BAR', 'TI'), overlap='O_MBAR.pdf', >>> breakdown=True, forwrev=10) From 9f17cbc3521e1a37985215de5734171925d6d7ce Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Fri, 2 Sep 2022 18:41:41 +0100 Subject: [PATCH 107/123] Update src/alchemlyb/convergence/convergence.py Co-authored-by: Oliver Beckstein --- src/alchemlyb/convergence/convergence.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index 6ae17c35..9acc719b 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -25,6 +25,10 @@ def forward_backward_convergence(df_list, estimator='MBAR', num=10, **kwargs): List of DataFrame of either dHdl or u_nk. estimator : {'MBAR', 'BAR', 'TI'} Name of the estimators. + See the important note below on the use of "MBAR". + + .. deprecated:: 1.0.0 + Lower case input is also accepted until release 2.0.0. num : int The number of time points. kwargs : dict From cc3337468c6c0302bcbef1507015b6427ef33824 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Fri, 2 Sep 2022 18:48:41 +0100 Subject: [PATCH 108/123] Update src/alchemlyb/convergence/convergence.py Co-authored-by: Oliver Beckstein --- src/alchemlyb/convergence/convergence.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index 9acc719b..bc6bbdf3 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -62,7 +62,9 @@ def forward_backward_convergence(df_list, estimator='MBAR', num=10, **kwargs): .. versionadded:: 0.6.0 .. versionchanged:: 1.0.0 - The ``estimator`` accepts uppercase input. + The ``estimator`` accepts uppercase input. + The default for using ``estimator='MBAR'`` was changed from + :class:`~alchemlyb.estimators.MBAR` to :class:`~alchemlyb.estimators.AutoMBAR`. ''' logger = logging.getLogger('alchemlyb.convergence.' From f840d2759346b801e5559ddca42f069dce6ae9d8 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Fri, 2 Sep 2022 18:48:52 +0100 Subject: [PATCH 109/123] Update src/alchemlyb/estimators/mbar_.py Co-authored-by: Oliver Beckstein --- src/alchemlyb/estimators/mbar_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py index 729ced48..973adbbd 100644 --- a/src/alchemlyb/estimators/mbar_.py +++ b/src/alchemlyb/estimators/mbar_.py @@ -156,7 +156,7 @@ class AutoMBAR(MBAR): ---------- method : str, optional, default=None - The optimization routine to use. This parameter is default to None. + The optimization routine to use. This parameter defaults to ``None``. When a specific method is set, AutoMBAR will behave in the same ways as MBAR. From 5070fe6082c7e85661a38c81038fa910433b693f Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Fri, 2 Sep 2022 18:49:07 +0100 Subject: [PATCH 110/123] Update src/alchemlyb/estimators/mbar_.py Co-authored-by: Oliver Beckstein --- src/alchemlyb/estimators/mbar_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py index 973adbbd..3e8a7618 100644 --- a/src/alchemlyb/estimators/mbar_.py +++ b/src/alchemlyb/estimators/mbar_.py @@ -157,7 +157,7 @@ class AutoMBAR(MBAR): method : str, optional, default=None The optimization routine to use. This parameter defaults to ``None``. - When a specific method is set, AutoMBAR will behave in the same ways + When a specific method is set, AutoMBAR will behave in the same way as MBAR. Note From f7c7c21c617faca77d4c137d332f0eeb39c9d6a3 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Fri, 2 Sep 2022 18:49:32 +0100 Subject: [PATCH 111/123] Update src/alchemlyb/estimators/mbar_.py Co-authored-by: Oliver Beckstein --- src/alchemlyb/estimators/mbar_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py index 3e8a7618..a1b43b5e 100644 --- a/src/alchemlyb/estimators/mbar_.py +++ b/src/alchemlyb/estimators/mbar_.py @@ -172,7 +172,7 @@ class AutoMBAR(MBAR): .. versionadded:: 0.6.0 .. versionchanged:: 1.0.0 - The AutoMBAR accepts method argument. + AutoMBAR accepts the `method` argument. """ def __init__(self, maximum_iterations=10000, relative_tolerance=1.0e-7, initial_f_k=None, verbose=False, method=None): From 30bccd32cb5ef965d07d98980fbaf04278bf4bf5 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Fri, 2 Sep 2022 18:50:19 +0100 Subject: [PATCH 112/123] Update src/alchemlyb/estimators/mbar_.py Co-authored-by: Oliver Beckstein --- src/alchemlyb/estimators/mbar_.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/alchemlyb/estimators/mbar_.py b/src/alchemlyb/estimators/mbar_.py index a1b43b5e..832fde09 100644 --- a/src/alchemlyb/estimators/mbar_.py +++ b/src/alchemlyb/estimators/mbar_.py @@ -160,6 +160,9 @@ class AutoMBAR(MBAR): When a specific method is set, AutoMBAR will behave in the same way as MBAR. + .. versionadded:: 1.0.0 + + Note ---- All arguments are described under :class:`MBAR` except that the solver method From fa4c4bfa97b5765e1174015cf352f97e54c55c73 Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Fri, 2 Sep 2022 19:39:01 +0100 Subject: [PATCH 113/123] update changes --- CHANGES | 3 +++ src/alchemlyb/tests/test_workflow_ABFE.py | 16 ++++++++++++++++ src/alchemlyb/workflows/abfe.py | 20 ++++++++++++++++---- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/CHANGES b/CHANGES index 50e1858d..c8d75da8 100644 --- a/CHANGES +++ b/CHANGES @@ -19,6 +19,9 @@ The rules for this file: Changes - Drop support for py3.7 (Issue #179, PR #214) + - forward_backward_convergence will use AutoMBAR as backend when `MBAR` is + selected as the estimator (PR #114). + - AutoMBAR accepts the `method` argument (PR #114). Enhancements - Add a base class for workflows (PR #188). diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index 6ce1ad13..6c742e4d 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -273,6 +273,22 @@ def test_run_none(self, workflow): workflow.run(uncorr=None, estimators=None, overlap=None, breakdown=None, forwrev=None) + def test_read_invalid_u_nk(self, workflow, monkeypatch): + def extract_u_nk(self, T): + raise ValueError + monkeypatch.setattr(workflow, '_extract_u_nk', + extract_u_nk) + with pytest.raises(ValueError, match='Error reading read u_nk from'): + workflow.read() + + def test_read_invalid_dHdl(self, workflow, monkeypatch): + def extract_dHdl(self, T): + raise ValueError + monkeypatch.setattr(workflow, '_extract_dHdl', + extract_dHdl) + with pytest.raises(ValueError, match='Error reading read dhdl from'): + workflow.read() + def test_uncorr_threshold(self, workflow, monkeypatch): '''Test if the full data will be used when the number of data points are less than the threshold.''' diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index f7e53710..6a8f4441 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -151,7 +151,7 @@ def read(self): def run(self, skiptime=0, uncorr='dhdl', threshold=50, estimators=('MBAR', 'BAR', 'TI'), overlap='O_MBAR.pdf', - breakdown=True, forwrev=10, *args, **kwargs): + breakdown=True, forwrev=None, *args, **kwargs): ''' The method for running the automatic analysis. Parameters @@ -180,7 +180,11 @@ def run(self, skiptime=0, uncorr='dhdl', threshold=50, Plot the free energy change as a function of time in both directions, with the specified number of points in the time plot. The number of time points (an integer) must be provided. Specify as ``None`` will not do - the convergence analysis. Default: 10. + the convergence analysis. Default: None. By default, 'MBAR' + estimator will be used for convergence analysis. If the dataset + does not contain u_nk, please run + meth:`~alchemlyb.workflows.ABFE.check_convergence` manually + with estimator='TI'. Attributes ---------- @@ -490,6 +494,8 @@ def generate_result(self): end = states.index(lambda_max) self.logger.info( f'Stage {stage} is from state {start} to state {end}.') + # This assumes that the indexes are sorted as the + # preprocessing should sort the index of the df. result = delta_f_.iloc[start, end] if estimator_name != 'BAR': error = d_delta_f_.iloc[start, end] @@ -501,6 +507,8 @@ def generate_result(self): data_dict[estimator_name + '_Error'].append(error) # Total result + # This assumes that the indexes are sorted as the + # preprocessing should sort the index of the df. result = delta_f_.iloc[0, -1] if estimator_name != 'BAR': error = d_delta_f_.iloc[0, -1] @@ -670,8 +678,12 @@ def check_convergence(self, forwrev, estimator='MBAR', dF_t='dF_t.pdf', self.logger.info('Subsampled u_nk not available, ' 'use original data instead.') else: - self.logger.error(f'u_nk is needed for the f{estimator} estimator.') - raise ValueError(f'u_nk is needed for the f{estimator} estimator.') + msg = f"u_nk is needed for the f{estimator} estimator. " \ + f"If the dataset only has dHdl, " \ + f"run ABFE.check_convergence(estimator='TI') to " \ + f"use a TI estimator." + self.logger.error(msg) + raise ValueError(msg) convergence = forward_backward_convergence(u_nk_list, estimator=estimator, num=forwrev, **kwargs) From 6cf8895b77f3b94ad2ce9c32f328270456d8e37b Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Fri, 2 Sep 2022 19:39:42 +0100 Subject: [PATCH 114/123] no cover --- src/alchemlyb/workflows/abfe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 6a8f4441..95441643 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -112,7 +112,7 @@ def read(self): self.logger.info( f'Reading {len(u_nk)} lines of u_nk from {file}') u_nk_list.append(u_nk) - except Exception as exc: # pragma: no cover + except Exception as exc: msg = f'Error reading read u_nk from {file}.\n{exc}' if self.ignore_warnings: self.logger.exception(msg + @@ -126,7 +126,7 @@ def read(self): self.logger.info( f'Reading {len(dhdl)} lines of dhdl from {file}') dHdl_list.append(dhdl) - except Exception as exc: # pragma: no cover + except Exception as exc: msg = f'Error reading read dhdl from {file}.\n{exc}' if self.ignore_warnings: self.logger.exception(msg + From 8a3178769b728e198f002d2c8824c430205f2857 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Fri, 2 Sep 2022 21:08:11 +0100 Subject: [PATCH 115/123] Update CHANGES Co-authored-by: Oliver Beckstein --- CHANGES | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index c8d75da8..2af55bc1 100644 --- a/CHANGES +++ b/CHANGES @@ -20,7 +20,7 @@ The rules for this file: Changes - Drop support for py3.7 (Issue #179, PR #214) - forward_backward_convergence will use AutoMBAR as backend when `MBAR` is - selected as the estimator (PR #114). + selected as the estimator (PR #114). - AutoMBAR accepts the `method` argument (PR #114). Enhancements From be55f591f67eb2337781df0723c5a5c4985299c6 Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Fri, 2 Sep 2022 21:43:24 +0100 Subject: [PATCH 116/123] update --- src/alchemlyb/workflows/abfe.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 95441643..e6cef074 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -113,13 +113,13 @@ def read(self): f'Reading {len(u_nk)} lines of u_nk from {file}') u_nk_list.append(u_nk) except Exception as exc: - msg = f'Error reading read u_nk from {file}.\n{exc}' + msg = f'Error reading read u_nk from {file}.' if self.ignore_warnings: - self.logger.exception(msg + + self.logger.exception(msg + f'\n{exc}\n' + 'This exception is being ignored because ignore_warnings=True.') else: self.logger.error(msg) - raise ValueError(msg) + raise ValueError(msg) from exc try: dhdl = self._extract_dHdl(file, T=self.T) @@ -127,13 +127,13 @@ def read(self): f'Reading {len(dhdl)} lines of dhdl from {file}') dHdl_list.append(dhdl) except Exception as exc: - msg = f'Error reading read dhdl from {file}.\n{exc}' + msg = f'Error reading read dhdl from {file}.' if self.ignore_warnings: - self.logger.exception(msg + + self.logger.exception(msg + f'\n{exc}\n' + 'This exception is being ignored because ignore_warnings=True.') else: self.logger.error(msg) - raise ValueError(msg) + raise ValueError(msg) from exc # Sort the files according to the state self.logger.info('Sort files according to the u_nk.') @@ -181,8 +181,9 @@ def run(self, skiptime=0, uncorr='dhdl', threshold=50, with the specified number of points in the time plot. The number of time points (an integer) must be provided. Specify as ``None`` will not do the convergence analysis. Default: None. By default, 'MBAR' - estimator will be used for convergence analysis. If the dataset - does not contain u_nk, please run + estimator will be used for convergence analysis, as it is + usually the fastest converging method. If the dataset does not + contain u_nk, please run meth:`~alchemlyb.workflows.ABFE.check_convergence` manually with estimator='TI'. From e6752c9d3a4dbe6e41154916d8c887ea9306fa23 Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Sat, 3 Sep 2022 10:04:05 +0100 Subject: [PATCH 117/123] update --- src/alchemlyb/workflows/abfe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index e6cef074..d1ac206a 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -119,7 +119,7 @@ def read(self): 'This exception is being ignored because ignore_warnings=True.') else: self.logger.error(msg) - raise ValueError(msg) from exc + raise exc from ValueError(msg) try: dhdl = self._extract_dHdl(file, T=self.T) @@ -133,7 +133,7 @@ def read(self): 'This exception is being ignored because ignore_warnings=True.') else: self.logger.error(msg) - raise ValueError(msg) from exc + raise exc from ValueError(msg) # Sort the files according to the state self.logger.info('Sort files according to the u_nk.') From 282e98d3ab538e6703cf629e926fb1924e2880ff Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Sat, 3 Sep 2022 10:13:29 +0100 Subject: [PATCH 118/123] fix test --- src/alchemlyb/tests/test_workflow_ABFE.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index 6c742e4d..6614e57d 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -275,18 +275,18 @@ def test_run_none(self, workflow): def test_read_invalid_u_nk(self, workflow, monkeypatch): def extract_u_nk(self, T): - raise ValueError + raise IOError('Error read u_nk.') monkeypatch.setattr(workflow, '_extract_u_nk', extract_u_nk) - with pytest.raises(ValueError, match='Error reading read u_nk from'): + with pytest.raises(IOError, match='Error read u_nk.'): workflow.read() def test_read_invalid_dHdl(self, workflow, monkeypatch): def extract_dHdl(self, T): - raise ValueError + raise IOError('Error read dHdl.') monkeypatch.setattr(workflow, '_extract_dHdl', extract_dHdl) - with pytest.raises(ValueError, match='Error reading read dhdl from'): + with pytest.raises(IOError, match='Error read dHdl.'): workflow.read() def test_uncorr_threshold(self, workflow, monkeypatch): From 594bb3311d2b4ba4863bc9ee84bcfd60b5c8980b Mon Sep 17 00:00:00 2001 From: "William (Zhiyi) Wu" Date: Sat, 3 Sep 2022 10:16:52 +0100 Subject: [PATCH 119/123] changes --- CHANGES | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 2af55bc1..4a2ebdb2 100644 --- a/CHANGES +++ b/CHANGES @@ -13,7 +13,7 @@ The rules for this file: * release numbers follow "Semantic Versioning" https://semver.org ------------------------------------------------------------------------------ -??/??/2022 orbeckst +??/??/2022 orbeckst, xiki-tempula * 1.0.0 From 77ce671c1a8790fde82e498b0dd7effefa40e435 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Mon, 5 Sep 2022 17:15:49 -0700 Subject: [PATCH 120/123] raise OSError from original_exception in ABFE.read() --- src/alchemlyb/workflows/abfe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index d1ac206a..31e909ec 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -119,7 +119,7 @@ def read(self): 'This exception is being ignored because ignore_warnings=True.') else: self.logger.error(msg) - raise exc from ValueError(msg) + raise OSError(msg) from exc try: dhdl = self._extract_dHdl(file, T=self.T) @@ -133,7 +133,7 @@ def read(self): 'This exception is being ignored because ignore_warnings=True.') else: self.logger.error(msg) - raise exc from ValueError(msg) + raise OSError(msg) from exc # Sort the files according to the state self.logger.info('Sort files according to the u_nk.') From ed3871dd0ba0d5503d266d564a8e9cbab4984a1e Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Mon, 5 Sep 2022 17:51:32 -0700 Subject: [PATCH 121/123] fixed tests for OSError exceptions --- src/alchemlyb/tests/test_workflow_ABFE.py | 6 ++++-- src/alchemlyb/workflows/abfe.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index 6614e57d..272ca9eb 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -278,7 +278,8 @@ def extract_u_nk(self, T): raise IOError('Error read u_nk.') monkeypatch.setattr(workflow, '_extract_u_nk', extract_u_nk) - with pytest.raises(IOError, match='Error read u_nk.'): + with pytest.raises(OSError, + match=r'Error reading u_nk .*/dhdl\.xvg\.bz2'): workflow.read() def test_read_invalid_dHdl(self, workflow, monkeypatch): @@ -286,7 +287,8 @@ def extract_dHdl(self, T): raise IOError('Error read dHdl.') monkeypatch.setattr(workflow, '_extract_dHdl', extract_dHdl) - with pytest.raises(IOError, match='Error read dHdl.'): + with pytest.raises(OSError, + match=r'Error reading dHdl .*/dhdl\.xvg\.bz2'): workflow.read() def test_uncorr_threshold(self, workflow, monkeypatch): diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index 31e909ec..00e3f76a 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -113,7 +113,7 @@ def read(self): f'Reading {len(u_nk)} lines of u_nk from {file}') u_nk_list.append(u_nk) except Exception as exc: - msg = f'Error reading read u_nk from {file}.' + msg = f'Error reading u_nk from {file}.' if self.ignore_warnings: self.logger.exception(msg + f'\n{exc}\n' + 'This exception is being ignored because ignore_warnings=True.') @@ -127,7 +127,7 @@ def read(self): f'Reading {len(dhdl)} lines of dhdl from {file}') dHdl_list.append(dhdl) except Exception as exc: - msg = f'Error reading read dhdl from {file}.' + msg = f'Error reading dHdl from {file}.' if self.ignore_warnings: self.logger.exception(msg + f'\n{exc}\n' + 'This exception is being ignored because ignore_warnings=True.') From d6d7b9fe79f3b232a454ebeff44b1f1264c273ed Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Mon, 5 Sep 2022 17:59:57 -0700 Subject: [PATCH 122/123] use upper case estimators in tests --- src/alchemlyb/tests/test_convergence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alchemlyb/tests/test_convergence.py b/src/alchemlyb/tests/test_convergence.py index 2107bc8a..6520713c 100644 --- a/src/alchemlyb/tests/test_convergence.py +++ b/src/alchemlyb/tests/test_convergence.py @@ -49,7 +49,7 @@ def test_convergence_bar(gmx_benzene): def test_convergence_wrong_estimator(gmx_benzene): dHdl, u_nk = gmx_benzene with pytest.raises(ValueError, match="is not available in"): - forward_backward_convergence(u_nk, 'www') + forward_backward_convergence(u_nk, 'WWW') def test_convergence_wrong_cases(gmx_benzene): dHdl, u_nk = gmx_benzene From ef4a8d72886d162b63f91ce74e4782cb42ba573b Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Mon, 5 Sep 2022 18:02:42 -0700 Subject: [PATCH 123/123] fix test for windows, too --- src/alchemlyb/tests/test_workflow_ABFE.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index 272ca9eb..7c4807c1 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -279,7 +279,7 @@ def extract_u_nk(self, T): monkeypatch.setattr(workflow, '_extract_u_nk', extract_u_nk) with pytest.raises(OSError, - match=r'Error reading u_nk .*/dhdl\.xvg\.bz2'): + match=r'Error reading u_nk .*dhdl\.xvg\.bz2'): workflow.read() def test_read_invalid_dHdl(self, workflow, monkeypatch): @@ -288,7 +288,7 @@ def extract_dHdl(self, T): monkeypatch.setattr(workflow, '_extract_dHdl', extract_dHdl) with pytest.raises(OSError, - match=r'Error reading dHdl .*/dhdl\.xvg\.bz2'): + match=r'Error reading dHdl .*dhdl\.xvg\.bz2'): workflow.read() def test_uncorr_threshold(self, workflow, monkeypatch):