Skip to content

Commit

Permalink
Add gather docstrings (#1041)
Browse files Browse the repository at this point in the history
  • Loading branch information
atravitz authored Dec 9, 2024
1 parent 55c9924 commit 5e198a1
Showing 1 changed file with 86 additions and 20 deletions.
106 changes: 86 additions & 20 deletions openfecli/commands/gather.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,29 @@
# For details, see https://github.com/OpenFreeEnergy/openfe

import click
from openfecli import OFECommandPlugin
from openfecli.clicktypes import HyphenAwareChoice
import os
import pathlib
from typing import Callable, Literal
import warnings

from openfecli import OFECommandPlugin
from openfecli.clicktypes import HyphenAwareChoice


def _get_column(val):
def _get_column(val:float|int)->int:
"""Determine the index (where the 0th index is the decimal) at which the
first non-zero value occurs in a full-precision string representation of a value.
Parameters
----------
val : float|int
The raw value.
Returns
-------
int
Column index
"""
import numpy as np
if val == 0:
return 0
Expand All @@ -27,6 +43,23 @@ def format_estimate_uncertainty(
unc: float,
unc_prec: int = 1,
) -> tuple[str, str]:
"""Truncate raw estimate and uncertainty values to the appropriate uncertainty.
Parameters
----------
est : float
Raw estimate value.
unc : float
Raw uncertainty value.
unc_prec : int, optional
Precision, by default 1
Returns
-------
tuple[str, str]
The truncated raw and uncertainty values.
"""

import numpy as np
# get the last column needed for uncertainty
unc_col = _get_column(unc) - (unc_prec - 1)
Expand All @@ -41,21 +74,45 @@ def format_estimate_uncertainty(
return est_str, unc_str


def is_results_json(f):
# sanity check on files before we try and deserialize
return 'estimate' in open(f, 'r').read(20)
def is_results_json(fpath:os.PathLike|str)->bool:
"""Sanity check that file is a result json before we try to deserialize"""
return 'estimate' in open(fpath, 'r').read(20)


def load_results(f):
# path to deserialized results
def load_results(fpath:os.PathLike|str)->dict:
"""Load the data from a results JSON into a dict
Parameters
----------
fpath : os.PathLike | str
The path to deserialized results.
Returns
-------
dict
A dict containing data from the results JSON.
"""

import json
from gufe.tokenization import JSON_HANDLER

return json.load(open(f, 'r'), cls=JSON_HANDLER.decoder)
return json.load(open(fpath, 'r'), cls=JSON_HANDLER.decoder)


def get_names(result:dict) -> tuple[str, str]:
"""Get the ligand names from a unit's results data.
Parameters
----------
result : dict
A results dict.
Returns
-------
tuple[str, str]
Ligand names corresponding to the results.
"""

def get_names(result) -> tuple[str, str]:
# Result to tuple of ligand names
nm = list(result['unit_results'].values())[0]['name']
toks = nm.split()
if toks[2] == 'repeat':
Expand All @@ -64,7 +121,10 @@ def get_names(result) -> tuple[str, str]:
return toks[0], toks[2]


def get_type(res):
def get_type(res:dict)->Literal['vacuum','solvent','complex']:
"""Determine the simulation type based on the component names."""
# TODO: use component *types* instead here

list_of_pur = list(res['protocol_result']['data'].values())[0]
pur = list_of_pur[0]
components = pur['inputs']['stateA']['components']
Expand All @@ -77,7 +137,9 @@ def get_type(res):
return 'solvent'


def legacy_get_type(res_fn):
def legacy_get_type(res_fn:os.PathLike|str)->Literal['vacuum','solvent','complex']:
# TODO: Deprecate this when we no longer rely on key names in `get_type()`

if 'solvent' in res_fn:
return 'solvent'
elif 'vacuum' in res_fn:
Expand All @@ -86,7 +148,7 @@ def legacy_get_type(res_fn):
return 'complex'


def _generate_bad_legs_error_message(set_vals, ligpair):
def _generate_bad_legs_error_message(set_vals:set, ligpair)->str:
expected_rbfe = {'complex', 'solvent'}
expected_rhfe = {'solvent', 'vacuum'}
maybe_rhfe = bool(set_vals & expected_rhfe)
Expand Down Expand Up @@ -139,7 +201,7 @@ def _parse_raw_units(results: dict) -> list[tuple]:
for pu in list_of_pur]


def _get_ddgs(legs, error_on_missing=True):
def _get_ddgs(legs:dict, error_on_missing=True):
import numpy as np
DDGs = []
for ligpair, vals in sorted(legs.items()):
Expand Down Expand Up @@ -179,7 +241,7 @@ def _get_ddgs(legs, error_on_missing=True):
return DDGs


def _write_ddg(legs, writer, allow_partial):
def _write_ddg(legs:dict, writer:Callable, allow_partial:bool):
DDGs = _get_ddgs(legs, error_on_missing=not allow_partial)
writer.writerow(["ligand_i", "ligand_j", "DDG(i->j) (kcal/mol)",
"uncertainty (kcal/mol)"])
Expand All @@ -192,7 +254,7 @@ def _write_ddg(legs, writer, allow_partial):
writer.writerow([ligA, ligB, DDGhyd, hyd_unc])


def _write_raw(legs, writer, allow_partial=True):
def _write_raw(legs:dict, writer:Callable, allow_partial=True):
writer.writerow(["leg", "ligand_i", "ligand_j",
"DG(i->j) (kcal/mol)", "MBAR uncertainty (kcal/mol)"])

Expand All @@ -206,7 +268,7 @@ def _write_raw(legs, writer, allow_partial=True):
writer.writerow([simtype, *ligpair, m, u])


def _write_dg_raw(legs, writer, allow_partial): # pragma: no-cover
def _write_dg_raw(legs:dict, writer:Callable, allow_partial): # pragma: no-cover
writer.writerow(["leg", "ligand_i", "ligand_j", "DG(i->j) (kcal/mol)",
"uncertainty (kcal/mol)"])
for ligpair, vals in sorted(legs.items()):
Expand All @@ -218,7 +280,7 @@ def _write_dg_raw(legs, writer, allow_partial): # pragma: no-cover
writer.writerow([simtype, *ligpair, m, u])


def _write_dg_mle(legs, writer, allow_partial):
def _write_dg_mle(legs:dict, writer:Callable, allow_partial:bool):
import networkx as nx
import numpy as np
from cinnabar.stats import mle
Expand Down Expand Up @@ -299,7 +361,11 @@ def _write_dg_mle(legs, writer, allow_partial):
"(Skip those edges and issue warning instead.)"
)
)
def gather(rootdir, output, report, allow_partial):
def gather(rootdir:os.PathLike|str,
output:os.PathLike|str,
report:Literal['dg','ddg','raw'],
allow_partial:bool
):
"""Gather simulation result jsons of relative calculations to a tsv file
This walks ROOTDIR recursively and finds all result JSON files from the
Expand Down

0 comments on commit 5e198a1

Please sign in to comment.