diff --git a/.gitignore b/.gitignore index ef580db..77c9e9d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ __pycache__ +pip-wheel-metadata *.pyc package.json .vscode diff --git a/CHANGELOG.md b/CHANGELOG.md index c2fecbf..8fbcd17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Update log +### Version 1.7.0 | 2022-02-16 +#### Features +- Added Gold docking backend +- Added OpenFF parametrisation for GROMACS simulations +#### Internal +- Refactor MD system parametrisation and GROMACS topology handling +- Refactor step dispatching to allow for cleaner parallelization +- Various bug fixes and stability improvements + ### Version 1.6.0 | 2022-02-04 #### Internal - Unit tests fixed. diff --git a/environment_min.yml b/environment_min.yml index 193a24f..1202336 100644 --- a/environment_min.yml +++ b/environment_min.yml @@ -19,6 +19,7 @@ dependencies: - openbabel>=3 - rdkit>=2021.09.2 - acpype>=2022 + - openff-toolkit>= 0.10.2 - pip: - black - regex diff --git a/examples/workflow/docking/active_learning_docking.json b/examples/workflow/docking/active_learning_docking.json index 361fd36..9f0a9b8 100644 --- a/examples/workflow/docking/active_learning_docking.json +++ b/examples/workflow/docking/active_learning_docking.json @@ -19,7 +19,6 @@ "running_mode": "active_learning", "virtual_lib": "/lib.sdf", "validation_lib": "/val.sdf", - "activity_threshold": -7, "n_rounds": 10, "init_samples": 256, "batch_size" : 128, diff --git a/examples/workflow/docking/gold_docking.json b/examples/workflow/docking/gold_docking.json new file mode 100644 index 0000000..2a1f8da --- /dev/null +++ b/examples/workflow/docking/gold_docking.json @@ -0,0 +1,108 @@ +{ + "workflow": { + "header": { + "workflow_id": "Gold docking", + "description": "Runs docking using CCDC's GOLD and a pre-defined receptor file.", + "environment": { + "export": [ + ] + }, + "global_variables": { + "smiles": "another_mol:Nc1ccc(cc1N)C(F)(F)F;failure:CXXC;aspirin:O=C(C)Oc1ccccc1C(=O)O", + "cavity_mol2_path": "{package_dir}/../IcolosData/molecules/1UYD/PU8_reference_ligand.mol2", + "receptor_path": "{package_dir}/../IcolosData/Gold/1UYD_protein.mol2" + } + }, + "steps": [{ + "step_id": "rdkit_embedding", + "type": "embedding", + "settings": { + "arguments": { + "flags": ["-epik"], + "parameters": { + "protonate": true, + "method": "rdkit" + } + }, + "additional": { + } + }, + "input": { + "compounds": [{ + "source": "{smiles}", + "source_type": "string" + } + ] + } + }, { + "step_id": "Gold", + "type": "gold_docking", + "execution": { + "prefix_execution": "module load ccdc", + "parallelization": { + "cores": 4 + }, + "failure_policy": { + "n_tries": 3 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + } + }, + "additional": { + "configuration": { + "AUTOMATIC SETTINGS": { + "autoscale": 0.5 + }, + "FLOOD FILL": { + "cavity_file": "{cavity_mol2_path}" + }, + "PROTEIN DATA": { + "protein_datafile": "{receptor_path}" + } + }, + "grid_ids": ["1UYD"] + } + }, + "input": { + "compounds": [{ + "source": "rdkit_embedding", + "source_type": "step" + } + ] + }, + "writeout": [ + { + "compounds": { + "category": "conformers" + }, + "destination": { + "resource": "{package_dir}/tests/junk/gold_docked_conformers.sdf", + "type": "file", + "format": "SDF" + } + }, + { + "compounds": { + "category": "conformers", + "selected_tags": ["docking_score", "grid_id"], + "aggregation": { + "mode": "best_per_compound", + "key": "docking_score", + "highest_is_best": true + } + }, + "destination": { + "resource": "{package_dir}/tests/junk/gold_docked_conformers.csv", + "type": "file", + "format": "CSV" + } + } + ] + } + ] + } +} \ No newline at end of file diff --git a/examples/workflow/gromacs/gromacs_ensemble_mmgbsa.json b/examples/workflow/gromacs/gromacs_ensemble_mmgbsa.json index 755bd52..5b4adb3 100644 --- a/examples/workflow/gromacs/gromacs_ensemble_mmgbsa.json +++ b/examples/workflow/gromacs/gromacs_ensemble_mmgbsa.json @@ -69,15 +69,8 @@ } }, "additional": {} - }, - "input": { - "generic": [ - { - "source": "01_pdb2gmx", - "extension": "gro" - } - ] } + }, { "step_id": "03_solvate", @@ -93,19 +86,8 @@ } }, "additional": {} - }, - "input": { - "generic": [ - { - "source": "02_editconf", - "extension": "gro" - }, - { - "source": "01_pdb2gmx", - "extension": "top" - } - ] } + }, { "step_id": "04_grompp", @@ -124,21 +106,9 @@ }, "input": { "generic": [ - { - "source": "03_solvate", - "extension": "gro" - }, { "source": "{file_base}/ions.mdp", "extension": "mdp" - }, - { - "source": "03_solvate", - "extension": "top" - }, - { - "source": "01_pdb2gmx", - "extension": "itp" } ] } @@ -168,14 +138,6 @@ { "source": "04_grompp", "extension": "tpr" - }, - { - "source": "04_grompp", - "extension": "top" - }, - { - "source": "04_grompp", - "extension": "itp" } ] } @@ -201,21 +163,10 @@ }, "input": { "generic": [ - { - "source": "05_genion", - "extension": "gro" - }, + { "source": "{file_base}/minim.mdp", "extension": "mdp" - }, - { - "source": "05_genion", - "extension": "top" - }, - { - "source": "05_genion", - "extension": "itp" } ] } @@ -269,21 +220,10 @@ }, "input": { "generic": [ - { - "source": "07_eminim_mdrun", - "extension": "gro" - }, - { - "source": "05_genion", - "extension": "top" - }, + { "source": "{file_base}/nvt_equil.mdp", "extension": "mdp" - }, - { - "source": "01_pdb2gmx", - "extension": "itp" } ] } @@ -337,26 +277,12 @@ }, "input": { "generic": [ - { - "source": "09_nvt_mdrun", - "extension": "gro" - }, - { - "source": "05_genion", - "extension": "top" - }, + { "source": "{file_base}/npt_equil.mdp", "extension": "mdp" - }, - { - "source": "01_pdb2gmx", - "extension": "itp" - }, - { - "source": "08_nvt_grompp", - "extension": "ndx" } + ] } }, @@ -412,21 +338,10 @@ }, "input": { "generic": [ - { - "source": "11_npt_mdrun", - "extension": "gro" - }, - { - "source": "05_genion", - "extension": "top" - }, + { "source": "{file_base}/md.mdp", "extension": "mdp" - }, - { - "source": "01_pdb2gmx", - "extension": "itp" } ] } @@ -571,19 +486,8 @@ { "source": "13_prod_mdrun", "extension": "tpr" - }, - { - "source": "13_prod_mdrun", - "extension": "gro" - }, - { - "source": "12_prod_md_grompp", - "extension": "top" - }, - { - "source": "12_prod_md_grompp", - "extension": "itp" } + ] }, "writeout": [ diff --git a/examples/workflow/gromacs/gromacs_fpocket.json b/examples/workflow/gromacs/gromacs_fpocket.json index 2809873..583b520 100644 --- a/examples/workflow/gromacs/gromacs_fpocket.json +++ b/examples/workflow/gromacs/gromacs_fpocket.json @@ -97,18 +97,6 @@ } }, "additional": {} - }, - "input": { - "generic": [ - { - "source": "02_editconf", - "extension": "gro" - }, - { - "source": "01_pdb2gmx", - "extension": "top" - } - ] } }, { @@ -128,21 +116,10 @@ }, "input": { "generic": [ - { - "source": "03_solvate", - "extension": "gro" - }, + { "source": "{file_base}/ions.mdp", "extension": "mdp" - }, - { - "source": "03_solvate", - "extension": "top" - }, - { - "source": "01_pdb2gmx", - "extension": "itp" } ] } @@ -172,10 +149,6 @@ { "source": "04_grompp", "extension": "tpr" - }, - { - "source": "04_grompp", - "extension": "top" } ] } @@ -197,21 +170,10 @@ }, "input": { "generic": [ - { - "source": "05_genion", - "extension": "gro" - }, - { - "source": "{file_base}/minim.mdp", - "extension": "mdp" - }, + { "source": "05_genion", "extension": "top" - }, - { - "source": "01_pdb2gmx", - "extension": "itp" } ] } @@ -259,21 +221,10 @@ }, "input": { "generic": [ - { - "source": "07_eminim_mdrun", - "extension": "gro" - }, - { - "source": "05_genion", - "extension": "top" - }, + { "source": "{file_base}/nvt_equil.mdp", "extension": "mdp" - }, - { - "source": "01_pdb2gmx", - "extension": "itp" } ] } @@ -320,25 +271,9 @@ }, "input": { "generic": [ - { - "source": "09_nvt_mdrun", - "extension": "gro" - }, - { - "source": "05_genion", - "extension": "top" - }, { "source": "{file_base}/npt_equil.mdp", "extension": "mdp" - }, - { - "source": "01_pdb2gmx", - "extension": "itp" - }, - { - "source": "08_nvt_grompp", - "extension": "ndx" } ] } @@ -388,25 +323,9 @@ }, "input": { "generic": [ - { - "source": "11_npt_mdrun", - "extension": "gro" - }, - { - "source": "05_genion", - "extension": "top" - }, { "source": "{file_base}/md.mdp", "extension": "mdp" - }, - { - "source": "01_pdb2gmx", - "extension": "itp" - }, - { - "source": "08_nvt_grompp", - "extension": "ndx" } ] } @@ -570,19 +489,8 @@ "additional": { "pipe_input": "Protein" } - }, - "input": { - "generic": [ - { - "source": "12_prod_md_grompp", - "extension": "gro" - }, - { - "source": "12_prod_md_grompp", - "extension": "ndx" - } - ] } + }, { "step_id": "17_MDpocket", diff --git a/examples/workflow/gromacs/gromacs_md.json b/examples/workflow/gromacs/gromacs_md.json index b59c912..9b90cf2 100644 --- a/examples/workflow/gromacs/gromacs_md.json +++ b/examples/workflow/gromacs/gromacs_md.json @@ -63,15 +63,8 @@ }, "additional": { } - }, - "input": { - "generic": [ - { - "source": "01_pdb2gmx", - "extension": "gro" - } - - ]} + } + },{ "step_id": "03_solvate", @@ -88,18 +81,8 @@ }, "additional": { } - }, - "input": { - "generic": [ - { - "source": "02_editconf", - "extension": "gro" - },{ - "source": "01_pdb2gmx", - "extension": "top" - } - - ]} + } + }, { @@ -121,17 +104,8 @@ "input": { "generic": [ { - "source": "03_solvate", - "extension": "gro" - },{ "source": "{file_base}/ions.mdp", "extension": "mdp" - },{ - "source": "03_solvate", - "extension": "top" - },{ - "source": "01_pdb2gmx", - "extension": "itp" } ]} @@ -158,12 +132,6 @@ { "source": "04_grompp", "extension": "tpr" - },{ - "source": "04_grompp", - "extension": "top" - },{ - "source": "04_grompp", - "extension": "itp" } ]} @@ -186,18 +154,9 @@ }, "input": { "generic": [ - { - "source": "05_genion", - "extension": "gro" - },{ + { "source": "{file_base}/minim.mdp", "extension": "mdp" - },{ - "source": "05_genion", - "extension": "top" - },{ - "source": "05_genion", - "extension": "itp" } ]} @@ -245,17 +204,8 @@ "input": { "generic": [ { - "source": "07_eminim_mdrun", - "extension": "gro" - },{ - "source": "05_genion", - "extension": "top" - },{ "source": "{file_base}/nvt_equil.mdp", "extension": "mdp" - },{ - "source": "01_pdb2gmx", - "extension": "itp" } ]} },{ @@ -300,18 +250,9 @@ }, "input": { "generic": [ - { - "source": "09_nvt_mdrun", - "extension": "gro" - },{ - "source": "05_genion", - "extension": "top" - },{ + { "source": "{file_base}/npt_equil.mdp", "extension": "mdp" - },{ - "source": "01_pdb2gmx", - "extension": "itp" } ]} },{ @@ -362,17 +303,8 @@ "input": { "generic": [ { - "source": "11_npt_mdrun", - "extension": "gro" - },{ - "source": "05_genion", - "extension": "top" - },{ "source": "{file_base}/md.mdp", "extension": "mdp" - },{ - "source": "01_pdb2gmx", - "extension": "itp" } ] } diff --git a/examples/workflow/gromacs/gromacs_mmgbsa.json b/examples/workflow/gromacs/gromacs_mmgbsa.json index 4ea3256..d149798 100644 --- a/examples/workflow/gromacs/gromacs_mmgbsa.json +++ b/examples/workflow/gromacs/gromacs_mmgbsa.json @@ -71,15 +71,8 @@ } }, "additional": {} - }, - "input": { - "generic": [ - { - "source": "01_pdb2gmx", - "extension": "gro" - } - ] } + }, { "step_id": "03_solvate", @@ -95,19 +88,8 @@ } }, "additional": {} - }, - "input": { - "generic": [ - { - "source": "02_editconf", - "extension": "gro" - }, - { - "source": "01_pdb2gmx", - "extension": "top" - } - ] } + }, { "step_id": "04_grompp", @@ -126,21 +108,10 @@ }, "input": { "generic": [ - { - "source": "03_solvate", - "extension": "gro" - }, + { "source": "{file_base}/ions.mdp", "extension": "mdp" - }, - { - "source": "03_solvate", - "extension": "top" - }, - { - "source": "01_pdb2gmx", - "extension": "itp" } ] } @@ -170,14 +141,6 @@ { "source": "04_grompp", "extension": "tpr" - }, - { - "source": "04_grompp", - "extension": "top" - }, - { - "source": "04_grompp", - "extension": "itp" } ] } @@ -199,21 +162,10 @@ }, "input": { "generic": [ - { - "source": "05_genion", - "extension": "gro" - }, + { "source": "{file_base}/minim.mdp", "extension": "mdp" - }, - { - "source": "05_genion", - "extension": "top" - }, - { - "source": "05_genion", - "extension": "itp" } ] } @@ -260,21 +212,10 @@ }, "input": { "generic": [ - { - "source": "07_eminim_mdrun", - "extension": "gro" - }, - { - "source": "05_genion", - "extension": "top" - }, + { "source": "{file_base}/nvt_equil.mdp", "extension": "mdp" - }, - { - "source": "01_pdb2gmx", - "extension": "itp" } ] } @@ -321,25 +262,10 @@ }, "input": { "generic": [ - { - "source": "09_nvt_mdrun", - "extension": "gro" - }, - { - "source": "05_genion", - "extension": "top" - }, + { "source": "{file_base}/npt_equil.mdp", "extension": "mdp" - }, - { - "source": "01_pdb2gmx", - "extension": "itp" - }, - { - "source": "08_nvt_grompp", - "extension": "ndx" } ] } @@ -389,21 +315,10 @@ }, "input": { "generic": [ - { - "source": "11_npt_mdrun", - "extension": "gro" - }, - { - "source": "05_genion", - "extension": "top" - }, + { "source": "{file_base}/md.mdp", "extension": "mdp" - }, - { - "source": "01_pdb2gmx", - "extension": "itp" } ] } @@ -541,18 +456,6 @@ { "source": "13_prod_mdrun", "extension": "tpr" - }, - { - "source": "13_prod_mdrun", - "extension": "gro" - }, - { - "source": "12_prod_md_grompp", - "extension": "top" - }, - { - "source": "12_prod_md_grompp", - "extension": "itp" } ] }, diff --git a/examples/workflow/pmx/pmx_rbfe.json b/examples/workflow/pmx/pmx_rbfe.json index 69cc062..796329b 100644 --- a/examples/workflow/pmx/pmx_rbfe.json +++ b/examples/workflow/pmx/pmx_rbfe.json @@ -17,10 +17,6 @@ "key": "GMX_FORCE_UPDATE_DEFAULT_GPU", "value": "true" }, - { - "key": "ACPYPE", - "value": "${ACPYPE}/acpype" - }, { "key": "PMX_PYTHON", "value": "${CONDA}/envs/pmx/bin/python" diff --git a/external_documentation/gold.conf b/external_documentation/gold.conf new file mode 100644 index 0000000..afac0b8 --- /dev/null +++ b/external_documentation/gold.conf @@ -0,0 +1,77 @@ + GOLD CONFIGURATION FILE + + AUTOMATIC SETTINGS +autoscale = 1 + + POPULATION +popsiz = auto +select_pressure = auto +n_islands = auto +maxops = auto +niche_siz = auto + + GENETIC OPERATORS +pt_crosswt = auto +allele_mutatewt = auto +migratewt = auto + + FLOOD FILL +radius = 10 +origin = 0 0 0 +do_cavity = 1 +floodfill_atom_no = 0 +cavity_file = /path/Desktop/Projects/StructureBased/IcolosData/molecules/1UYD/PU8_reference_ligand.mol2 +floodfill_center = cavity_from_ligand 7 atoms + + DATA FILES +ligand_data_file /path/Desktop/Projects/StructureBased/IcolosData/molecules/aspirin.sdf 10 +ligand_data_file /path/Desktop/Projects/StructureBased/IcolosData/molecules/paracetamol.sdf 10 +param_file = DEFAULT +set_ligand_atom_types = 1 +set_protein_atom_types = 0 +directory = . +tordist_file = DEFAULT +make_subdirs = 0 +save_lone_pairs = 1 +fit_points_file = fit_pts.mol2 +read_fitpts = 0 + + FLAGS +internal_ligand_h_bonds = 0 +flip_free_corners = 0 +match_ring_templates = 0 +flip_amide_bonds = 0 +flip_planar_n = 1 flip_ring_NRR flip_ring_NHR +flip_pyramidal_n = 0 +rotate_carboxylic_oh = flip +use_tordist = 1 +postprocess_bonds = 1 +rotatable_bond_override_file = DEFAULT +solvate_all = 1 + + TERMINATION +early_termination = 1 +n_top_solutions = 3 +rms_tolerance = 1.5 + + CONSTRAINTS +force_constraints = 0 + + COVALENT BONDING +covalent = 0 + + SAVE OPTIONS +save_score_in_file = 1 +save_protein_torsions = 1 + + FITNESS FUNCTION SETTINGS +initial_virtual_pt_match_max = 3 +relative_ligand_energy = 1 +gold_fitfunc_path = goldscore +start_vdw_linear_cutoff = 6 +score_param_file = DEFAULT + + PROTEIN DATA +protein_datafile = /path/Desktop/1UYD_protein.mol2 + + diff --git a/setup.py b/setup.py index 8c3257f..891031a 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name="icolos", maintainer="Christian Margreitter, Harry Moore", - version="1.6.0", + version="1.7.0", url="https://github.com/MolecularAI/Icolos", packages=find_namespace_packages(where="src"), package_dir={"": "src"}, diff --git a/src/icolos/core/composite_agents/workflow.py b/src/icolos/core/composite_agents/workflow.py index a1ca079..db9a619 100644 --- a/src/icolos/core/composite_agents/workflow.py +++ b/src/icolos/core/composite_agents/workflow.py @@ -1,10 +1,9 @@ from typing import Dict, List from pydantic import BaseModel, PrivateAttr +from icolos.core.containers.gromacs_topol import GromacsTopol from icolos.core.containers.perturbation_map import PerturbationMap from icolos.core.flow_control.flow_control import FlowControlBase -from icolos.core.step_dispatch.dispatcher import StepDispatcher -from icolos.core.steps_utils import initialize_step_from_dict from icolos.core.workflow_steps.step import StepBase from icolos.core.composite_agents.base_agent import BaseAgent, AgentHeaderParameters from icolos.utils.enums.step_enums import StepBaseEnum @@ -26,6 +25,7 @@ class WorkflowHeaderParameters(AgentHeaderParameters, BaseModel): class WorkflowData(BaseModel): work_dir: str = None perturbation_map: PerturbationMap = None + gmx_topol: GromacsTopol = GromacsTopol() class WorkFlow(BaseAgent, BaseModel): @@ -46,6 +46,10 @@ def __init__(self, **data): self._initialized_steps = [] def initialize(self): + # this feels hacky but it fixes a circular import and allows a step in the workflow + # to create other workflows + from icolos.core.steps_utils import initialize_step_from_dict + super().initialize() self._initialized_steps = [] for step_conf in self.steps: @@ -56,18 +60,8 @@ def initialize(self): step.set_workflow_object(self) self._initialized_steps.append(step) elif isinstance(step, FlowControlBase): - # flow control has returned a list of steps, or a single JobControl step - if isinstance(step.initialized_steps, list): - for st in step.initialized_steps: - st.set_workflow_object(self) - self._initialized_steps.append(st) - elif isinstance(step.initialized_steps, StepDispatcher): - # parallelize was set, returns a JobControl wrapper - # step.initialized_steps.initialized_steps. - # set_workflow_object(self) - for st in step.initialized_steps.initialized_steps: - st.set_workflow_object(self) - self._initialized_steps.append(step.initialized_steps) + self._initialized_steps.append(step.dispatcher) + step.dispatcher.set_workflow_object(self) self._logger.log( f"Initialized {len(self._initialized_steps)} steps in workflow {self.header.id}.", _LE.DEBUG, @@ -108,11 +102,8 @@ def find_step_by_step_id(self, step_id: str): for step in self._initialized_steps: if step.step_id == step_id: return step - elif step.type == _SBE.STEP_DISPATCHER: - # the steps themselves are buried in the _initialized_steps attribute of JobControl, - for st in step.initialized_steps: - if st.step_id == step_id: - return st + elif step.step_id == _SBE.STEP_DISPATCHER: + pass raise IndexError(f"Could not find step with step_id {step_id} in workflow.") diff --git a/src/icolos/core/containers/compound.py b/src/icolos/core/containers/compound.py index b36fc2d..0f466c8 100644 --- a/src/icolos/core/containers/compound.py +++ b/src/icolos/core/containers/compound.py @@ -349,11 +349,14 @@ def __repr__(self): if self.get_compound_object() is None else self.get_compound_object().get_compound_number() ) - return "" % ( - self.get_enumeration_id(), - self.get_smile(), - parent_compound_id, - len(self._conformers), + return ( + "" + % ( + self.get_enumeration_id(), + self.get_smile(), + parent_compound_id, + len(self._conformers), + ) ) def __str__(self): diff --git a/src/icolos/core/containers/gromacs_topol.py b/src/icolos/core/containers/gromacs_topol.py new file mode 100644 index 0000000..40c8d91 --- /dev/null +++ b/src/icolos/core/containers/gromacs_topol.py @@ -0,0 +1,288 @@ +import os +from turtle import st +from typing import AnyStr, Dict, List +from pydantic import BaseModel +from icolos.utils.enums.program_parameters import GromacsEnum + +from icolos.utils.enums.step_enums import StepGromacsEnum + +# acpype and pdb2gmx both do a job of handling posre creation etc + +# this should be a singular object tacked on to the workflow that gets updated as the workflow progresses. + +_SGE = StepGromacsEnum() +_GE = GromacsEnum() + + +class AtomType(BaseModel): + number: int + a_type: str + resi: int + res: str + atom: str + cgnr: int + charge: float + mass: float + + +class GromacsTopol(BaseModel): + top_lines: List = [] + itps: Dict = {} + posre: Dict = {} + atomtypes: List = [] + chains: List = None + forcefield: str = "amber03" + water: str = "tip3p" + system: List = [] + molecules: Dict = {} + # TODO: we could make this a generic data object? + structure: List = [] + + def __init__(self, **data) -> None: + super().__init__(**data) + + def parse(self, file): + """ + Populate the fields from parsing a topol file (normally from gmx pdb2gmx) + If a moleculetype has been defined in a single topology, this is separated into its own itp file + """ + # get the system name + work_dir = os.path.dirname(file) + with open(file, "r") as f: + lines = f.readlines() + + # first check for included atomtypes to be extracted to their own itp files + start_idx = None + stop_idx = None + for line in lines: + if line.startswith(_GE.MOLECULETYPES): + start_idx = lines.index(line) + # go all the way to IFDEF POSRE + elif line.startswith("#ifdef POSRES") and start_idx is not None: + stop_idx = lines.index(line) + 3 + break + if all([x is not None for x in (start_idx, stop_idx)]): + itp_extract = lines[start_idx:stop_idx] + itp_key = itp_extract[2].split()[0] + ".itp" + self.itps[itp_key] = itp_extract + lines = lines[:start_idx] + lines[stop_idx:] + + # extract itp files (not including forcefield stuff) + for line in lines: + if ( + line.startswith("#include") + and ".itp" in line + and all([item not in line for item in (".ff", "posre")]) + ): + itp_file = line.split()[-1].replace('"', "") + with open(os.path.join(work_dir, itp_file), "r") as f: + itp_lines = f.readlines() + self.itps[itp_file] = itp_lines + # extract water model + for line in lines: + if line.startswith("#include") and ".ff/tip" in line: + self.water = line.split("/")[-1].split(".")[0].replace('"', "") + self.forcefield = line.split()[-1].split(".")[0].replace('"', "") + start = lines.index([l for l in lines if l.startswith(_GE.SYSTEM)][0]) + stop = lines.index([l for l in lines[start:] if l.startswith(_GE.MOLECULES)][0]) + for line in lines[start + 1 : stop]: + if not line.startswith(";") and line.strip(): + self.system.append(line.strip()) + # excract molecules and counts + for line in lines[stop + 1 :]: + if not line.startswith(";") and line.strip(): + parts = line.split() + self.molecules[parts[0].strip()] = parts[1].strip() + + def write_topol(self, base_dir: str, file: str = "topol.top"): + """ + Write a gromacs topology file, including its dependent itp files, to a dir + """ + if not self.top_lines: + self.top_lines = self.generate_base_topol_file() + with open(os.path.join(base_dir, file), "w") as f: + for l in self.top_lines: + f.write(l) + for itp, lines in self.itps.items(): + if os.path.isfile(os.path.join(base_dir, itp)): + os.remove(os.path.join(base_dir, itp)) + with open(os.path.join(base_dir, itp), "w") as f: + for l in lines: + f.write(l) + for itp, lines in self.posre.items(): + if os.path.isfile(os.path.join(base_dir, itp)): + os.remove(os.path.join(base_dir, itp)) + with open(os.path.join(base_dir, itp), "w") as f: + for l in lines: + f.write(l) + + def generate_base_topol_file(self) -> List[str]: + """ + Generates the main topology file + """ + top_lines = [] + top_lines.append(f'#include "{self.forcefield}.ff/forcefield.itp"\n') + # find any new atomtypes in the parametrised components, slot these in first + new_atomtypes = self.collect_atomtypes() + top_lines += new_atomtypes + + # now include the itp files + for file in self.itps.keys(): + top_lines.append(f'#include "{file}"\n') + if "Protein" not in file: + # these are handled independently in the itp files + top_lines.append(f'#ifdef POSRES\n#include "posre_{file}"\n#endif\n') + + # add water model + top_lines.append(f'#include "{self.forcefield}.ff/{self.water}.itp"\n') + top_lines.append(_SGE.WATER_POSRE) + + # add ions itp + top_lines.append(f'#include "{self.forcefield}.ff/ions.itp"\n') + + top_lines.extend(self._construct_block(_GE.SYSTEM, self.system)) + top_lines.extend(self._construct_block(_GE.MOLECULES, self.molecules)) + return top_lines + + def add_itp(self, path, files: List[str], gen_posre: bool = True) -> None: + for file in files: + with open(os.path.join(path, file), "r") as f: + lines = f.readlines() + self.itps[file] = lines + if gen_posre: + # also generate a posre file + self.generate_posre(path, file) + + def add_molecule(self, name: str, num: int = 1): + self.molecules[name] = num + + def add_posre(self, path, files: List[str]) -> None: + for file in files: + with open(os.path.join(path, file), "r") as f: + lines = f.readlines() + + self.posre[file] = lines + + def generate_posre(self, path: str, itp_file: str, force: int = 1000): + stub = itp_file.split(".")[0] + with open(os.path.join(path, itp_file), "r") as f: + lines = f.readlines() + start_idx = None + stop_idx = None + for line in lines: + if line.startswith(_GE.ATOMS_DIRECTIVE): + start_idx = lines.index(line) + 1 + elif line.startswith(_GE.BONDS) and start_idx is not None: + stop_idx = lines.index(line) + break + lines = [ + l for l in lines[start_idx:stop_idx] if not l.startswith(";") and l.strip() + ] + args = ["number", "a_type", "resi", "res", "atom", "cgnr", "charge", "mass"] + atoms = [] + for line in lines: + args_dict = {} + for arg, val in zip(args, line.split(";")[0].split()): + args_dict[arg] = val + atoms.append(AtomType(**args_dict)) + + posre = "posre_" + stub + ".itp" + out_path = os.path.join(path, posre) + header = "\n[ position_restraints ]\n; atom type fx fy fz\n" + written_lines = [header] + with open(out_path, "w") as f: + f.write(header) + for atom in atoms: + if not atom.a_type.upper().startswith("H"): + line = ( + f"{atom.number:>6d} 1 {force:>5d} {force:>5d} {force:>5d}\n" + ) + f.write(line) + written_lines.append(line) + self.posre[posre] = written_lines + + def _construct_block(self, header, items) -> List: + if not header.endswith("\n"): + header += "\n" + block = [header] + if isinstance(items, dict): + for key, value in items.items(): + block.append(f"{key} {value}\n") + else: + for i in items: + block.append(i + "\n") + return block + + def collect_atomtypes(self) -> List: + """ + Iterate over the itp files, strip any newly defined atomtypes, append to their own atomtypes.itp file, include this just after the forcefield include + """ + atomtype_lines = [] + # read the topol file, identify all the itp files it is #including + for itp, file_lines in self.itps.items(): + selection, remaining = self._extract_atomtype(file_lines) + atomtype_lines.extend(selection) + self.itps[itp] = remaining + if atomtype_lines: + atomtype_lines = self._remove_duplicate_atomtypes(atomtype_lines) + return atomtype_lines + + def _extract_atomtype(self, lines: List) -> List[AnyStr]: + """ + Pull the atomtype lines out of the topol file and return them as a list, write the sanitised itp file to directory + """ + + start_index = None + stop_index = None + selection = [] + remaining = [] + for idx, line in enumerate(lines): + if _GE.ATOMTYPES in line: + start_index = idx + if _GE.MOLECULETYPES in line: + stop_index = idx + if start_index is not None: + selection = lines[start_index:stop_index] + remaining = lines[:start_index] + + remaining.extend(lines[stop_index:]) + return selection, remaining + + def _remove_duplicate_atomtypes(self, atomtypes: List): + output = [atomtypes[0]] + for line in atomtypes: + if line not in output: + output.append(line) + return output + + def set_topol(self, path: str, file: str = _SGE.STD_TOPOL): + """ + When solvate or genion produce a modified topol, read this into the + """ + with open(os.path.join(path, file), "r") as f: + self.top_lines = f.readlines() + + def set_structure(self, path: str, file: str = _SGE.STD_STRUCTURE, sanitize=False): + with open(os.path.join(path, file), "r") as f: + lines = f.readlines() + if sanitize: + lines = [l for l in lines if any([l.startswith(idx) for idx in _GE.ATOMS])] + self.structure = lines + + def write_structure(self, path: str, file: str = _SGE.STD_STRUCTURE): + with open(os.path.join(path, file), "w") as f: + for line in self.structure: + f.write(line) + + def append_structure(self, path: str, file: str, sanitize=False) -> None: + with open(os.path.join(path, file), "r") as f: + lines = f.readlines() + if sanitize: + lines = [l for l in lines if any([l.startswith(idx) for idx in _GE.ATOMS])] + self.structure.extend(lines) + + def __str__(self) -> str: + return f"Gromacs Topology object: System: {self.system} | Molecules: {[m for m in self.molecules]} | FF: {self.forcefield} | itp files: {[f for f in self.itps.keys()]} | posre files {[f for f in self.posre.keys()]}" + + def __repr__(self) -> str: + return self.__str__() diff --git a/src/icolos/core/containers/perturbation_map.py b/src/icolos/core/containers/perturbation_map.py index ad408e9..70898e1 100644 --- a/src/icolos/core/containers/perturbation_map.py +++ b/src/icolos/core/containers/perturbation_map.py @@ -1,9 +1,10 @@ from typing import Dict, List, Optional from IPython.lib.display import IFrame import pandas as pd -from icolos.core.containers.compound import Compound, Conformer, Enumeration +from icolos.core.containers.compound import Compound, Conformer from pyvis.network import Network from icolos.core.containers.generic import GenericData +from icolos.utils.enums.parallelization import ParallelizationEnum from icolos.utils.enums.step_enums import StepFepPlusEnum import os @@ -11,6 +12,7 @@ _SFE = StepFepPlusEnum() +_PE = ParallelizationEnum class Node(BaseModel): @@ -72,6 +74,7 @@ class Config: min_no_atoms: str = None snapCoreRmsd: str = None bidirSnapCoreRmsd: str = None + status: _PE = _PE.STATUS_SUCCESS def __init__(self, **data): super().__init__(**data) @@ -86,6 +89,10 @@ def get_edge_id(self) -> str: # construct the edge ID from the node hashes, separated by '_' return f"{self.node_from.get_node_hash()}_{self.node_to.get_node_hash()}" + def _set_status(self, status: str): + assert status in [_PE.STATUS_SUCCESS, _PE.STATUS_FAILED] + self.status = status + class PerturbationMap(BaseModel): """Hold a map construction parsed from a csv (probabably from a parsed schrodinger log @@ -102,6 +109,8 @@ class Config: vmap_output: IFrame = None replicas: int = 3 node_df: pd.DataFrame = None + # prune subsequent edge calculations on error + strict_execution: str = True def __init__(self, **data) -> None: super().__init__(**data) @@ -256,8 +265,11 @@ def _get_node_by_hash_id(self, hash_id: str) -> Node: if node.node_hash == hash_id: return node - def get_edges(self) -> List[Edge]: - return self.edges + def get_edges(self, alive_only=True) -> List[Edge]: + if alive_only: + return [e for e in self.edges if e.status == _PE.STATUS_SUCCESS] + else: + return self.edges def get_nodes(self) -> List[Node]: return self.nodes @@ -288,6 +300,26 @@ def visualise_perturbation_map(self, write_out_path: str) -> None: def get_protein(self) -> GenericData: return self.protein + def get_edge_by_id(self, idx: str) -> Optional[Edge]: + # handle case where the task is actually a path to a batch script + # print("idx is", idx) + # if not isinstance(idx, Edge): # + # parts = idx.split("/") + # idx = [ + # part for part in parts if any([part in edge for edge in self.edges]) + # ][0] + # print(idx) + # # for part in parts: + # # for e in self.edges: + # # if part == e: + # # idx = part + match = [e for e in self.edges if e.get_edge_id() == idx] + print("matches", match) + if not match: + return + else: + return match[0] + def __repr__(self) -> str: return f"Icolos Perturbation Map object containing {len(self.edges)} edges and {len(self.nodes)} nodes" diff --git a/src/icolos/core/flow_control/flow_control.py b/src/icolos/core/flow_control/flow_control.py index 6a7e3da..3f12c76 100644 --- a/src/icolos/core/flow_control/flow_control.py +++ b/src/icolos/core/flow_control/flow_control.py @@ -9,11 +9,6 @@ StepWriteoutParameters, StepExecutionParameters, ) -from icolos.utils.enums.step_enums import StepBaseEnum -from icolos.utils.enums.step_initialization_enum import StepInitializationEnum -from icolos.utils.general.convenience_functions import nested_get - -_SIE = StepInitializationEnum() class BaseStepConfig(BaseModel): @@ -30,7 +25,7 @@ class BaseStepConfig(BaseModel): execution: StepExecutionParameters = StepExecutionParameters() settings: StepSettingsParameters = StepSettingsParameters() - def _as_dict(self): + def as_dict(self): return { "step_id": self.step_id, "type": self.type, @@ -52,16 +47,3 @@ class FlowControlBase(BaseModel): def __init__(self, **data) -> None: super().__init__(**data) self._logger = StepLogger() - - def _initialize_step_from_dict(self, step_conf: dict): - # Require a separate initialisation method to avoid circular import - _STE = StepBaseEnum - - step_type = nested_get(step_conf, _STE.STEP_TYPE, default=None) - step_type = None if step_type is None else step_type.upper() - if step_type in _SIE.STEP_INIT_DICT.keys(): - return _SIE.STEP_INIT_DICT[step_type](**step_conf) - else: - raise ValueError( - f"Backend for step {nested_get(step_conf, _STE.STEPID, '')} unknown." - ) diff --git a/src/icolos/core/flow_control/iterator.py b/src/icolos/core/flow_control/iterator.py index c628ce7..f41ab98 100644 --- a/src/icolos/core/flow_control/iterator.py +++ b/src/icolos/core/flow_control/iterator.py @@ -1,19 +1,20 @@ -from cProfile import run from typing import Dict, List, Union from pydantic import BaseModel +from icolos.core.composite_agents.workflow import WorkFlow from icolos.core.flow_control.flow_control import BaseStepConfig, FlowControlBase from copy import deepcopy from icolos.core.workflow_steps.step import _LE from icolos.core.step_dispatch.dispatcher import StepDispatcher +from icolos.utils.enums.composite_agents_enums import WorkflowEnum from icolos.utils.enums.step_enums import StepBaseEnum from icolos.core.workflow_steps.step import StepBase from icolos.utils.enums.step_enums import IteratorEnum import os -from glob import glob _IE = IteratorEnum _SBE = StepBaseEnum +_WE = WorkflowEnum() class IterSettingsParameters(BaseModel): @@ -51,14 +52,12 @@ class StepIterator(FlowControlBase, BaseModel): # holds the dict of iterables for the bits to chang iter_settings: IterSettings = IterSettings() + dispatcher: StepDispatcher = None def __init__(self, **data): super().__init__(**data) - # when init_step_from_dict calls this method, we need to initialise a list of steps, - # controlled by iter_settings.iter_mode - self.initialized_steps = self._initialize_steps() - # either generate a list, if serial execution, or initialize a single JobControl - # step with each config as an initialized step + + self.dispatcher = self._initialize_workflows() def _modify_settings( self, settings: BaseStepConfig, step_config: BaseStepConfig, i: int @@ -85,21 +84,28 @@ def _modify_settings( return base_conf - def _initialize_n_iters(self) -> List: + def _initialize_n_iters(self) -> List[WorkFlow]: """ Initialise n identical copies of the same step config """ - init_steps = [] + workflows = [] for i in range(self.iter_settings.n_iters): - + workflow_steps = [] list_step_conf = deepcopy(self.base_config) # hand all steps over to the config updater - formatted_confs = self._update_config(list_step_conf, f"run_{i}") - for step_conf in formatted_confs: - initialized_step = self._initialize_step_from_dict(step_conf._as_dict()) - init_steps.append(initialized_step) - return init_steps + step_confs = self._update_config(list_step_conf, f"run_{i}") + for step in step_confs: + workflow_steps.append(step.as_dict()) + wf_config = { + _WE.HEADER: { + _WE.ID: f"workflow_{i}", + _WE.ENVIRONMENT: {_WE.ENVIRONMENT_EXPORT: []}, + }, + _WE.STEPS: workflow_steps, + } + workflows.append(WorkFlow(**wf_config)) + return workflows def _initialize_settings(self) -> List: """ @@ -126,83 +132,54 @@ def _initialize_settings(self) -> List: init_steps.append(initialized_step) return init_steps - def _initialize_steps(self) -> Union[List, StepBase]: + def _initialize_workflows(self) -> StepDispatcher: """ Handle step init according to config - Returns a list of steps if serial execution (default) - Returns a Step-like JobControl object if parallelisation is specified. + Returns a Step-like JobControl that holds a list of separate workflows, which will be executed according to the parallelization scheme. """ - steps = [] + workflows = [] if self.iter_settings.iter_mode == _IE.N_ITERS: - # simplest mode, just n repeats of the same step - steps += self._initialize_n_iters() + # simplest mode, generate n independent workflows with the same set of steps + workflows += self._initialize_n_iters() elif self.iter_settings.iter_mode == _IE.SINGLE: + raise NotImplementedError # for n different settings, iterate through each, returning n steps steps += self._initialize_settings() else: raise NotImplementedError self._logger.log( - f"Iterator has initialized {len(steps)} steps for step {self.base_config[0].step_id}", + f"Initialized {len(workflows)} jobs for step {self.base_config[0].step_id}", _LE.DEBUG, ) - if not self.iter_settings.parallelizer_settings.parallelize: - return steps - else: - - wrapper = StepDispatcher( - step_id="step_dispatcher", - type=_SBE.STEP_DISPATCHER, - initialized_steps=steps, - parallel_execution=self.iter_settings.parallelizer_settings, - ) - return wrapper + dispatcher = StepDispatcher( + step_id="step_dispatcher", + type=_SBE.STEP_DISPATCHER, + workflows=workflows, + parallel_execution=self.iter_settings.parallelizer_settings, + ) + return dispatcher def _update_config( self, step_conf: List[BaseStepConfig], run_id: str ) -> List[BaseStepConfig]: """ Manages modifications to each step in the config: - * step_id is updated with the run_id - * any references to other step_ids (e.g. in input) contained in the base config are updated to reflect the change * writeout paths are updated to separate output from each of the runs """ - original_step_ids = [conf.step_id for conf in step_conf] + formatted_confs = [] for conf in step_conf: - # modify the step_id - st_id = conf.step_id - conf.step_id = st_id + "_" + run_id + # modify the writeout paths: add a key_value dir the writeout path for block in conf.writeout: if block.destination.resource is not None: resource = block.destination.resource parts = resource.split("/") new_resource = os.path.join("/".join(parts[:-1]), run_id, parts[-1]) - print(new_resource) block.destination.resource = new_resource - # modify the step_input blocks if they reference a step_id contained in step_conf - # treat compounds - for comp in conf.input.compounds: - if comp.source in original_step_ids: - comp.source += f"_{run_id}" - - for gen in conf.input.generic: - if gen.source in original_step_ids: - gen.source += f"_{run_id}" - - # TODO: this is a bodge for now - # we have an edge case in data manipularion that needs to match compounds those from another step, the source name needs the same treatment - if conf.type.upper() == _SBE.STEP_DATA_MANIPULATION: - if ( - _SBE.INPUT_SOURCE in conf.settings.additional.keys() - and conf.settings.additional[_SBE.INPUT_SOURCE] in original_step_ids - ): - - conf.settings.additional[_SBE.INPUT_SOURCE] += f"_{run_id}" - formatted_confs.append(conf) return formatted_confs diff --git a/src/icolos/core/step_dispatch/dispatcher.py b/src/icolos/core/step_dispatch/dispatcher.py index d237ea0..a891949 100644 --- a/src/icolos/core/step_dispatch/dispatcher.py +++ b/src/icolos/core/step_dispatch/dispatcher.py @@ -1,6 +1,6 @@ from typing import List from pydantic.main import BaseModel - +from icolos.core.composite_agents.workflow import WorkFlow from icolos.core.workflow_steps.step import StepBase from icolos.utils.general.parallelization import Parallelizer, SubtaskContainer from icolos.core.workflow_steps.step import _LE @@ -23,7 +23,7 @@ class StepDispatcher(StepBase, BaseModel): Step-type class for disaptching multiple steps in parallel, useful for executing multiple batch jobs simultaneously """ - initialized_steps: List = [] + workflows: List = [] # expect the parallel execution block to be handed over from flow control parallel_execution: IterParallelizer = IterParallelizer() @@ -44,19 +44,14 @@ def execute(self): """ Execute multiple steps in parallel """ - # Spin up multiple processes + # Spin up multiple processes. self.execution.parallelization.cores = self.parallel_execution.cores - # each subtask needs to contain an entire mini workflow to be executed sequentially, - self.execution.parallelization.max_length_sublists = ( - self.parallel_execution.dependent_steps - ) - # if we try steps multiple times, we have steps fail depending on its dependency on a - # previous step - too complicated + # TODO, we can repeat entire workflows if we want, I'm not sure this makes sense though self._subtask_container = SubtaskContainer(max_tries=1) - self._subtask_container.load_data(self.initialized_steps) + self._subtask_container.load_data(self.workflows) - parallelizer = Parallelizer(func=self._run_step) + parallelizer = Parallelizer(func=self.execute_workflow) n = 1 while self._subtask_container.done() is False: @@ -72,9 +67,9 @@ def execute(self): _LE.INFO, ) - steps = self._prepare_batch(next_batch) + jobs = self._prepare_batch(next_batch) - result = parallelizer.execute_parallel(steps=steps) + result = parallelizer.execute_parallel(jobs=jobs) # TODO: sucessful execution of each step is not explicitly checked, # the step is responsible for throwing errors if something has gone wrong @@ -82,10 +77,12 @@ def execute(self): for subtask in task: subtask.set_status_success() - def _run_step(self, steps: List[StepBase]): + def execute_workflow(self, jobs): # submits then monitors the step - for step in steps: # length max_len_sublist - # at this point the internal steps don't have their data initialised - step.generate_input() - step.execute() - step.process_write_out() + wf_data = self.get_workflow_object().workflow_data + for idx, job in enumerate(jobs): + # copy existing wf data up to this point int othe new wf object + job.initialize() + job.workflow_data = wf_data + self._logger.log(f"Executing workflow {idx} of {len(jobs)}", _LE.DEBUG) + job.execute() diff --git a/src/icolos/core/step_utils/gold_config.py b/src/icolos/core/step_utils/gold_config.py new file mode 100644 index 0000000..2e01aec --- /dev/null +++ b/src/icolos/core/step_utils/gold_config.py @@ -0,0 +1,98 @@ +from typing import List +from pydantic import BaseModel + + +class ConfigProteinData(BaseModel): + protein_datafile: str + + +class ConfigFitnessFunctionSettings(BaseModel): + initial_virtual_pt_match_max: int = 3 + relative_ligand_energy: int = 1 + gold_fitfunc_path: str = "goldscore" + start_vdw_linear_cutoff: float = 6 + score_param_file: str = "DEFAULT" + + +class ConfigSaveOptions(BaseModel): + save_score_in_file: int = 1 + save_protein_torsions: int = 1 + + +class ConfigCovalentBonding(BaseModel): + # TODO: extend to support covalent docking + covalent: int = 0 + + +class ConfigConstraints(BaseModel): + # TODO: extend to support constrained docking + force_constraints: int = 0 + + +class ConfigTermination(BaseModel): + early_termination: int = 1 + n_top_solutions: int = 3 + rms_tolerance: float = 1.5 + + +class ConfigFlags(BaseModel): + internal_ligand_h_bonds: int = 0 + flip_free_corners: int = 0 + match_ring_templates: int = 0 + flip_amide_bonds: int = 0 + flip_planar_n: str = "1 flip_ring_NRR flip_ring_NHR" + flip_pyramidal_n: str = 0 + rotate_carboxylic_oh: str = "flip" + use_tordist: int = 1 + postprocess_bonds: int = 1 + rotatable_bond_override_file: str = "DEFAULT" + solvate_all: int = 1 + + +class ConfigDataFiles(BaseModel): + ligand_data_file: List[str] = [] + param_file: str = "DEFAULT" + set_ligand_atom_types: int = 1 + set_protein_atom_types: int = 0 + directory: str = "." + tordist_file: str = "DEFAULT" + make_subdirs: int = 0 + save_lone_pairs: int = 1 + fit_points_file: str = "fit_pts.mol2" + read_fitpts: str = 0 + + +class ConfigFloodFill(BaseModel): + radius: float = 10 + origin: str = "0 0 0" + do_cavity: int = 1 + floodfill_atom_no: int = 0 + cavity_file: str + floodfill_center: str = "cavity_from_ligand 7 atoms" + + +class ConfigGeneticOperators(BaseModel): + pt_crosswt: str = "auto" + allele_mutatewt: str = "auto" + migratewt: str = "auto" + + +class ConfigPopulation(BaseModel): + # Note that pydantic accepts also numbers when strings are specified (unless "strict" mode is utilized) + popsiz: str = "auto" + select_pressure: str = "auto" + n_islands: str = "auto" + maxops: str = "auto" + niche_siz: str = "auto" + + +class ConfigAutomaticSettings(BaseModel): + autoscale: float = ( + 1.0 # between 0 (off) and 5.0 (the larger the slower but more sampling) + ) + autoscale_nops_max: int = ( + 100000 # maximum value for autoscale operations (0 means off) + ) + autoscale_nops_min: int = ( + 100 # minimum value for autoscale operations (0 means off) + ) diff --git a/src/icolos/core/workflow_steps/calculation/panther.py b/src/icolos/core/workflow_steps/calculation/panther.py index 64b8535..be60d86 100644 --- a/src/icolos/core/workflow_steps/calculation/panther.py +++ b/src/icolos/core/workflow_steps/calculation/panther.py @@ -105,9 +105,9 @@ def _modify_panther_config_file( self, config_file: str, update_dictionary: dict ) -> str: for key, value in update_dictionary.items(): - pattern = fr"({key}.*:: ).*" + pattern = rf"({key}.*:: ).*" pattern = re.compile(pattern) - config_file = re.sub(pattern, fr"\1 {value}", config_file) + config_file = re.sub(pattern, rf"\1 {value}", config_file) return config_file def _execute_backend(self, tmp_dir): diff --git a/src/icolos/scripts/tests/junk/docked_conformers.sdf b/src/icolos/core/workflow_steps/ccdc/__init__.py similarity index 100% rename from src/icolos/scripts/tests/junk/docked_conformers.sdf rename to src/icolos/core/workflow_steps/ccdc/__init__.py diff --git a/src/icolos/core/workflow_steps/ccdc/docking.py b/src/icolos/core/workflow_steps/ccdc/docking.py new file mode 100644 index 0000000..393813e --- /dev/null +++ b/src/icolos/core/workflow_steps/ccdc/docking.py @@ -0,0 +1,402 @@ +import os +import glob +import tempfile +import time +from typing import List, Tuple + +from pydantic import BaseModel, Field +from rdkit import Chem +from copy import deepcopy + +from icolos.utils.enums.step_enums import StepBaseEnum, StepGoldEnum +from icolos.utils.execute_external.gold import GoldExecutor +from icolos.utils.general.files_paths import gen_tmp_file +from icolos.core.containers.compound import Conformer +from icolos.utils.enums.program_parameters import GoldOutputEnum, GoldEnum +from icolos.core.workflow_steps.step import _LE, StepBase +from icolos.utils.general.parallelization import Subtask, SubtaskContainer, Parallelizer +from icolos.core.step_utils.gold_config import ( + ConfigAutomaticSettings, + ConfigPopulation, + ConfigGeneticOperators, + ConfigFloodFill, + ConfigDataFiles, + ConfigFlags, + ConfigProteinData, + ConfigFitnessFunctionSettings, + ConfigSaveOptions, + ConfigCovalentBonding, + ConfigConstraints, + ConfigTermination, +) + +_SBE = StepBaseEnum +_CGE = GoldEnum() +_SGE = StepGoldEnum() +_GOE = GoldOutputEnum() + + +class GoldConfiguration(BaseModel): + automatic_settings: ConfigAutomaticSettings = Field( + alias="AUTOMATIC SETTINGS", default=ConfigAutomaticSettings() + ) + population: ConfigPopulation = Field(alias="POPULATION", default=ConfigPopulation()) + genetic_operators: ConfigGeneticOperators = Field( + alias="GENETIC OPERATORS", default=ConfigGeneticOperators() + ) + flood_fill: ConfigFloodFill = Field(alias="FLOOD FILL", default=None) + data_files: ConfigDataFiles = Field(alias="DATA FILES", default=ConfigDataFiles()) + flags: ConfigFlags = Field(alias="FLAGS", default=ConfigFlags()) + termination: ConfigTermination = Field( + alias="TERMINATION", default=ConfigTermination() + ) + constraints: ConfigConstraints = Field( + alias="CONSTRAINTS", default=ConfigConstraints() + ) + covalent_bonding: ConfigCovalentBonding = Field( + alias="COVALENT BONDING", default=ConfigCovalentBonding() + ) + save_options: ConfigSaveOptions = Field( + alias="SAVE OPTIONS", default=ConfigSaveOptions() + ) + fitness_function_settings: ConfigFitnessFunctionSettings = Field( + alias="FITNESS FUNCTION SETTINGS", default=ConfigFitnessFunctionSettings() + ) + protein_data: ConfigProteinData = Field( + alias="PROTEIN DATA", default=None + ) + + +class GoldAdditional(BaseModel): + configuration: GoldConfiguration = GoldConfiguration() + gold_config_file: str = None + docking_score_tag: str = "Gold.Goldscore.Fitness" + grid_ids: List[str] = ["grid0"] + + +class StepGold(StepBase, BaseModel): + + gold_additional: GoldAdditional = None + _gold_executor: GoldExecutor = None + + def __init__(self, **data): + super().__init__(**data) + + # initialize the executor and test availability + self._initialize_backend(executor=GoldExecutor) + self._check_backend_availability() + + # set Gold specific settings + self.gold_additional = GoldAdditional(**self.settings.additional) + + def _set_docking_score(self, conformer: Chem.Mol) -> bool: + tag = self.gold_additional.docking_score_tag + try: + docking_score = conformer.GetProp(tag) + except KeyError: + self._logger.log(f"Could not find tag \"{tag}\" in conformer, use \"docking_score_tag\" to adjust.", + _LE.DEBUG) + return False + conformer.SetProp(_SBE.ANNOTATION_TAG_DOCKING_SCORE, str(docking_score)) + return True + + def _generate_temporary_input_output_files( + self, batch: List[List[Subtask]] + ) -> Tuple[List[str], List[List[str]], List[List[str]]]: + tmp_output_dirs = [] + tmp_input_sdf_paths = [] + tmp_output_sdf_paths = [] + + for next_subtask_list in batch: + # generate temporary input files and output directory + cur_tmp_output_dir = tempfile.mkdtemp() + + # write-out the temporary input files: one molecule per file each + one_written = False + list_input_files = [] + list_output_files = [] + for subtask in next_subtask_list: + enumeration = subtask.data + mol = deepcopy(enumeration.get_molecule()) + if mol is not None: + _, cur_tmp_sdf = gen_tmp_file(suffix=".sdf", dir=cur_tmp_output_dir) + writer = Chem.SDWriter(cur_tmp_sdf) + mol.SetProp("_Name", enumeration.get_index_string()) + writer.write(mol) + writer.close() + list_input_files.append(cur_tmp_sdf) + + # output files for gold look something like: ranked_tmp1cwtavoh_m1_2.sdf + # where: (1) "ranked_" is a constant, (2) followed by the input file name, + # (3) followed by the molecule # (here always "m1" as only 1 compound/file at the moment), + # (4) followed by the pose # and finally (5) the ".sdf" ending + # use '*' for getting all files + basename_filepart = os.path.basename(cur_tmp_sdf).rstrip(".sdf") + list_output_files.append(os.path.join(cur_tmp_output_dir, "".join(["ranked_", + basename_filepart, + "_m1_*.sdf"]))) + one_written = True + if one_written is False: + # no compound left from this batch: remove the temporary folder and skip this one + self._remove_temporary(cur_tmp_output_dir) + continue + + # since all went well, add input and output paths here + tmp_input_sdf_paths.append(list_input_files) + tmp_output_sdf_paths.append(list_output_files) + tmp_output_dirs.append(cur_tmp_output_dir) + return ( + tmp_output_dirs, + tmp_input_sdf_paths, + tmp_output_sdf_paths + ) + + def _execute_gold(self): + # get number of sublists in batch and initialize Parallelizer + gold_parallelizer = Parallelizer(func=self._run_subjob) + + # continue until everything is successfully done or number of retries have been exceeded + while self._subtask_container.done() is False: + next_batch = self._get_sublists(get_first_n_lists=self._get_number_cores()) + + # generate paths and initialize molecules (so that if they fail, this can be covered) + ( + tmp_output_dirs, + tmp_input_sdf_paths, + tmp_output_sdf_paths + ) = self._generate_temporary_input_output_files(next_batch) + + # execute the current batch in parallel; hand over lists of parameters (will be handled by Parallelizer) + # also increment the tries and set the status to "failed" (don't do that inside subprocess, as data is + # copied, not shared!) + _ = [sub.increment_tries() for element in next_batch for sub in element] + _ = [sub.set_status_failed() for element in next_batch for sub in element] + gold_parallelizer.execute_parallel( + output_dir=tmp_output_dirs, input_paths=tmp_input_sdf_paths + ) + + # parse the output of that particular batch and remove temporary files + self._parse_gold_output( + tmp_output_paths=tmp_output_sdf_paths, + batch=next_batch + ) + + # clean-up + self._remove_temporary(tmp_output_dirs) + + # print the progress for this execution + self._log_execution_progress() + + def _parse_gold_output( + self, + tmp_output_paths: List[List[str]], + batch: List[List[Subtask]] + ): + def _update_subtask(sublist: List[Subtask], enum_identifier: str): + for task in sublist: + if task.data.get_index_string() == enum_identifier: + task.set_status_success() + + grid_id = self.gold_additional.grid_ids[0] + grid_path = self.gold_additional.configuration.protein_data.protein_datafile + + for batch_id in range(len(batch)): + cur_sublist = batch[batch_id] + list_comp_output = tmp_output_paths[batch_id] + + for compound_number in range(len(list_comp_output)): + + # expand the output paths to cover all pose files for this compound + # one input sdf matches to x output sdfs (for x poses) + comp_sdf_output_paths = glob.glob(list_comp_output[compound_number]) + + # loop over all output files + for comp_output in comp_sdf_output_paths: + # this is a protection against the case where empty (file size == 0 bytes) files + # are generated due to a failure during docking + if ( + not os.path.isfile(comp_output) + or os.path.getsize(comp_output) == 0 + ): + continue + + mol_supplier = Chem.SDMolSupplier(comp_output, removeHs=False) + for mol in mol_supplier: + if mol is None: + continue + + # The name is something like: 10:0|tmp8x3rtg4d|sdf|1|dock7 + cur_enumeration_name = str(mol.GetProp("_Name")).split('|')[0] + + # add the information on the actual grid used + mol.SetProp(_SBE.ANNOTATION_GRID_ID, str(grid_id)) + mol.SetProp(_SBE.ANNOTATION_GRID_PATH, str(grid_path)) + mol.SetProp(_SBE.ANNOTATION_GRID_FILENAME, os.path.basename(grid_path)) + + # if no docking score is attached (i.e. the molecule is a receptor or so, skip it) + if self._set_docking_score(mol) is not True: + continue + + # add molecule to the appropriate ligand + for compound in self.get_compounds(): + for enumeration in compound: + if enumeration.get_index_string() == cur_enumeration_name: + new_conformer = Conformer( + conformer=mol, + conformer_id=None, + enumeration_object=enumeration, + ) + enumeration.add_conformer(new_conformer, auto_update=True) + _update_subtask( + cur_sublist, enum_identifier=cur_enumeration_name + ) + break + + def _run_subjob(self, output_dir: str, input_paths: List[str]): + # generate the appropriate config; note that input and output paths are referring to one compound each + config_path = os.path.join(output_dir, "gold.config") + self.generate_config_file(path=config_path, ligand_files=input_paths) + + # set up arguments list and execute; change path to temporary sub-folder to avoid cluttering with files + arguments = [ + config_path + ] + old_dir = os.getcwd() + os.chdir(output_dir) + execution_result = self._backend_executor.execute( + command=_CGE.GOLD_CALL, arguments=arguments, check=True + ) + os.chdir(old_dir) + time.sleep(3) + + def _config_from_file(self, ligand_files: List[str]) -> List[str]: + config = [] + with open(self.gold_additional.gold_config_file, 'r') as f: + buffer = f.readlines() + idx = 0 + while idx < len(buffer): + line = buffer[idx].rstrip('\n') + config.append(line) + if _SGE.DATA_FILES in line: + # skip over all defined ligand files + while _SGE.LIGAND_DATA_FILE in buffer[idx+1]: + self._logger.log("Skipping pre-defined ligand in configuration file (remove lines starting with \"ligand_data_file\" to suppress this information).", + _LE.DEBUG) + idx = idx + 1 + + # add ligand files as required + for ligand_file in ligand_files: + config.append(' '.join([_SGE.LIGAND_DATA_FILE, ligand_file, "10"])) + idx = idx + 1 + return config + + def _config_from_default(self, ligand_files: List[str]) -> List[str]: + def block_indent(block_name: str) -> str: + return "".join([_SGE.BLOCK_INDENT, block_name]) + + def empty_line() -> str: + return "" + + def add_block(list_lines: List[str], block: dict): + for key, value in block.items(): + if key == _SGE.LIGAND_DATA_FILE: + # this needs to be overwritten by the actual compound collection + self._logger.log(f"Do not set ligand data files explicitly, use the compound handover - skipping.", + _LE.WARNING) + continue + list_lines.append(' '.join([key, '=', str(value)])) + list_lines.append(empty_line()) + + config = [block_indent(_SGE.CONFIGURATION_START), empty_line()] + + # automatic settings + config.append(block_indent(_SGE.AUTOMATIC_SETTINGS)) + add_block(config, self.gold_additional.configuration.automatic_settings.dict()) + + # population + config.append(block_indent(_SGE.POPULATION)) + add_block(config, self.gold_additional.configuration.population.dict()) + + # genetic operators + config.append(block_indent(_SGE.GENETIC_OPERATORS)) + add_block(config, self.gold_additional.configuration.genetic_operators.dict()) + + # flood fill + config.append(block_indent(_SGE.FLOOD_FILL)) + add_block(config, self.gold_additional.configuration.flood_fill.dict()) + + # data files + config.append(block_indent(_SGE.DATA_FILES)) + for ligand_file in ligand_files: + config.append(' '.join([_SGE.LIGAND_DATA_FILE, ligand_file, "10"])) + add_block(config, self.gold_additional.configuration.data_files.dict()) + + # flags + config.append(block_indent(_SGE.FLAGS)) + add_block(config, self.gold_additional.configuration.flags.dict()) + + # termination + config.append(block_indent(_SGE.TERMINATION)) + add_block(config, self.gold_additional.configuration.termination.dict()) + + # constraints + config.append(block_indent(_SGE.CONSTRAINTS)) + add_block(config, self.gold_additional.configuration.constraints.dict()) + + # covalent bonding + config.append(block_indent(_SGE.COVALENT_BONDING)) + add_block(config, self.gold_additional.configuration.covalent_bonding.dict()) + + # save options + config.append(block_indent(_SGE.SAVE_OPTIONS)) + add_block(config, self.gold_additional.configuration.save_options.dict()) + + # fitness function settings + config.append(block_indent(_SGE.FITNESS_FUNCTION_SETTINGS)) + add_block(config, self.gold_additional.configuration.fitness_function_settings.dict()) + + # protein data + config.append(block_indent(_SGE.PROTEIN_DATA)) + add_block(config, self.gold_additional.configuration.protein_data.dict()) + + return config + + def generate_config_file(self, path: str, ligand_files: List[str]): + # very complicated custom format for GOLD; see file in "external_documentation/gold.conf" for details + + if self.gold_additional.gold_config_file is None: + # generate config file step by step from default and overwrite set elements + # TODO: constraints are not yet supported (and probably a few other fine-grained settings) + config = self._config_from_default(ligand_files=ligand_files) + else: + # load path to config file and replace "ligand_files" as required; as configuration element is ignored in + # this mode, check that it is not set + if _SGE.CONFIGURATION in self.gold_additional.dict().keys(): + self._logger.log("The \"configuration\" elements are ignored when a config file path is specified.", + _LE.WARNING) + config = self._config_from_file(ligand_files=ligand_files) + + # write out + with open(path, 'w') as f: + for line in config: + f.write(line + "\n") + + def execute(self): + # Note: This step only supports one grid at a time, ensemble docking is taken care of at the workflow level! + + # in order to be able to efficiently execute Gold on the enumeration level, all of them have to be unrolled + # Note: As they retain their respective Compound object, the attribution later on is simple + all_enumerations = [] + for compound in self.get_compounds(): + all_enumerations = all_enumerations + compound.get_enumerations() + for enumeration in compound: + enumeration.clear_conformers() + + # split into sublists, according to the settings + self._subtask_container = SubtaskContainer( + max_tries=self.execution.failure_policy.n_tries + ) + self._subtask_container.load_data(all_enumerations) + + # execute Gold docking + self._execute_gold() diff --git a/src/icolos/core/workflow_steps/gromacs/base.py b/src/icolos/core/workflow_steps/gromacs/base.py index d049d5e..324c1bf 100644 --- a/src/icolos/core/workflow_steps/gromacs/base.py +++ b/src/icolos/core/workflow_steps/gromacs/base.py @@ -1,4 +1,5 @@ from icolos.core.containers.generic import GenericData +from icolos.core.containers.gromacs_topol import GromacsTopol from icolos.utils.enums.step_enums import StepGromacsEnum from pydantic import BaseModel import os @@ -18,7 +19,7 @@ class StepGromacsBase(StepBase, BaseModel): def __init__(self, **data): super().__init__(**data) - def _write_input_files(self, tmp_dir): + def write_input_files(self, tmp_dir): """ Prepares the tmpdir. Supports two modes of operation, depending on where the data has come from: 1) If tmpdir is empty and generic data is not, dump generic data files into tmpdir @@ -81,7 +82,7 @@ def _modify_config_file( ): file_data = config_file.get_data() for key, value in update_dict.items(): - pattern = fr"({key})(\s*=\s*)[a-zA-Z0-9\s\_]*(\s*;)" + pattern = rf"({key})(\s*=\s*)[a-zA-Z0-9\s\_]*(\s*;)" pattern = re.compile(pattern) matches = re.findall(pattern, file_data) if len(matches) == 0: @@ -91,7 +92,7 @@ def _modify_config_file( ) else: - file_data = re.sub(pattern, fr"\1\2 {value} \3", file_data) + file_data = re.sub(pattern, rf"\1\2 {value} \3", file_data) self._logger.log( f"Replaced field {key} of mdp file with value {value}", _LE.DEBUG ) @@ -164,7 +165,7 @@ def construct_pipe_arguments(self, tmp_dir, params) -> str: def _add_index_group(self, tmp_dir, pipe_input): ndx_args_2 = [ "-f", - self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_STRUCTURE), + os.path.join(tmp_dir, _SGE.STD_STRUCTURE), "-o", os.path.join(tmp_dir, _SGE.STD_INDEX), ] @@ -181,3 +182,6 @@ def _add_index_group(self, tmp_dir, pipe_input): ) for line in result.stdout.split("\n"): self._logger_blank.log(line, _LE.INFO) + + def get_topol(self) -> GromacsTopol: + return self.get_workflow_object().workflow_data.gmx_topol diff --git a/src/icolos/core/workflow_steps/gromacs/cluster.py b/src/icolos/core/workflow_steps/gromacs/cluster.py index 0c67453..9106a7f 100644 --- a/src/icolos/core/workflow_steps/gromacs/cluster.py +++ b/src/icolos/core/workflow_steps/gromacs/cluster.py @@ -24,7 +24,7 @@ def __init__(self, **data): def execute(self): tmp_dir = self._make_tmpdir() - self._write_input_files(tmp_dir) + self.write_input_files(tmp_dir) # give the option to run a make_ndx step preceding clustering to facilitate clustering on custom groups if _SGE.INDEX_FLAG in self.settings.arguments.parameters.keys(): diff --git a/src/icolos/core/workflow_steps/gromacs/do_dssp.py b/src/icolos/core/workflow_steps/gromacs/do_dssp.py index 825cd92..93ae6cf 100644 --- a/src/icolos/core/workflow_steps/gromacs/do_dssp.py +++ b/src/icolos/core/workflow_steps/gromacs/do_dssp.py @@ -24,7 +24,7 @@ def __init__(self, **data): def execute(self): tmp_dir = self._make_tmpdir() - self._write_input_files(tmp_dir) + self.write_input_files(tmp_dir) structure_file = self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TPR) traj_file = self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_XTC) diff --git a/src/icolos/core/workflow_steps/gromacs/editconf.py b/src/icolos/core/workflow_steps/gromacs/editconf.py index 250f75d..780f3f2 100644 --- a/src/icolos/core/workflow_steps/gromacs/editconf.py +++ b/src/icolos/core/workflow_steps/gromacs/editconf.py @@ -1,3 +1,4 @@ +import os from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum from icolos.utils.enums.program_parameters import GromacsEnum from icolos.utils.execute_external.gromacs import GromacsExecutor @@ -23,12 +24,11 @@ def __init__(self, **data): def execute(self): tmp_dir = self._make_tmpdir() - self._write_input_files(tmp_dir) - structure_file = self.data.generic.get_argument_by_extension( - _SGE.FIELD_KEY_STRUCTURE - ) + topol = self.get_topol() + topol.write_structure(tmp_dir) + self.write_input_files(tmp_dir) arguments = self._parse_arguments( - flag_dict={"-f": structure_file, "-o": structure_file} + flag_dict={"-f": _SGE.STD_STRUCTURE, "-o": _SGE.STD_STRUCTURE} ) pipe_input = ( @@ -51,6 +51,6 @@ def execute(self): self._logger.log( f"Completed execution for {self.step_id} successfully", _LE.INFO ) - + topol.set_structure(tmp_dir) self._parse_output(tmp_dir) self._remove_temporary(tmp_dir) diff --git a/src/icolos/core/workflow_steps/gromacs/genion.py b/src/icolos/core/workflow_steps/gromacs/genion.py index 688e64a..b591ae7 100644 --- a/src/icolos/core/workflow_steps/gromacs/genion.py +++ b/src/icolos/core/workflow_steps/gromacs/genion.py @@ -24,12 +24,14 @@ def __init__(self, **data): def execute(self): tmp_dir = self._make_tmpdir() - self._write_input_files(tmp_dir) + topol = self.get_topol() + topol.write_structure(tmp_dir) + topol.write_topol(tmp_dir) + self.write_input_files(tmp_dir) arguments = self._parse_arguments( { - # input file paths are handled internally "-o": _SGE.STD_STRUCTURE, - "-p": self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TOPOL), + "-p": _SGE.STD_TOPOL, "-s": self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TPR), } ) @@ -66,4 +68,6 @@ def execute(self): self._logger.log('Added index group to "index.ndx"', _LE.DEBUG) self._parse_output(tmp_dir) + topol.set_structure(tmp_dir) + topol.set_topol(tmp_dir) self._remove_temporary(tmp_dir) diff --git a/src/icolos/core/workflow_steps/gromacs/grompp.py b/src/icolos/core/workflow_steps/gromacs/grompp.py index 024229e..d044b8d 100644 --- a/src/icolos/core/workflow_steps/gromacs/grompp.py +++ b/src/icolos/core/workflow_steps/gromacs/grompp.py @@ -75,8 +75,9 @@ def execute(self): Note that any issues with your parametrisation or system building will normally cause grompp to panic """ tmp_dir = self._make_tmpdir() - self._write_input_files(tmp_dir) - + topol = self.get_topol() + topol.write_structure(tmp_dir) + topol.write_topol(tmp_dir) # if make_ndx command has been specified in settings.additional, # add an index group here, commonly protein_ligand or protein_other @@ -96,19 +97,18 @@ def execute(self): tmp_dir, self.settings.additional[_SGE.MAKE_NDX_COMMAND] ) - structure_file = self.data.generic.get_argument_by_extension( - _SGE.FIELD_KEY_STRUCTURE + mdp_file = self.data.generic.get_argument_by_extension( + _SGE.FIELD_KEY_MDP, rtn_file_object=True ) - mdp_file = self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_MDP) - topol_file = self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TOPOL) + mdp_file.write(tmp_dir) - args = ["-r", structure_file] if self.settings.additional["-r"] else [] + args = ["-r", _SGE.STD_STRUCTURE] if self.settings.additional["-r"] else [] arguments = self._parse_arguments( flag_dict={ - "-f": mdp_file, - "-c": structure_file, - "-p": topol_file, + "-f": mdp_file.get_file_name(), + "-c": _SGE.STD_STRUCTURE, + "-p": _SGE.STD_TOPOL, "-o": _SGE.STD_TPR, }, args=args, diff --git a/src/icolos/core/workflow_steps/gromacs/mdrun.py b/src/icolos/core/workflow_steps/gromacs/mdrun.py index 2e32136..a0e058e 100644 --- a/src/icolos/core/workflow_steps/gromacs/mdrun.py +++ b/src/icolos/core/workflow_steps/gromacs/mdrun.py @@ -45,9 +45,10 @@ def _tail_log_file(self, tmp_dir): def execute(self): tmp_dir = self._make_tmpdir() + topol = self.get_topol() # if we're simulating a protein, we need to modify the topol file to include the correct index groups \ # to allow ligand restraint. This means an ndx file must be specified in the json - self._write_input_files(tmp_dir) + self.write_input_files(tmp_dir) # append _out to the xtc file name xtc_output_file = self.generate_output_file(_SGE.STD_XTC) arguments = self._parse_arguments( @@ -66,4 +67,9 @@ def execute(self): f"Completed execution for {self.step_id} successfully", _LE.INFO ) self._parse_output(tmp_dir) + if "-c" in self.settings.arguments.parameters.keys(): + topol.set_structure(tmp_dir, self.settings.arguments.parameters["-c"]) + else: + topol.set_structure(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/src/icolos/core/workflow_steps/gromacs/mmpbsa.py b/src/icolos/core/workflow_steps/gromacs/mmpbsa.py index 607211c..d4fa008 100644 --- a/src/icolos/core/workflow_steps/gromacs/mmpbsa.py +++ b/src/icolos/core/workflow_steps/gromacs/mmpbsa.py @@ -70,7 +70,12 @@ def _parse_arguments(self, flag_dict: dict) -> List: def _run_mmpbsa(self, args, tmp_dir) -> CompletedProcess: command = _GE.MMPBSA - self._logger.log(f"Executing mmgbsa calculation in dir {tmp_dir}", _LE.DEBUG) + threads = self.get_additional_setting(_SGE.THREADS, default=1) + if threads > 1: + command = f"mpirun -np {threads} " + command + self._logger.log( + f"Executing mmgbsa calculation with {threads} thread(s)", _LE.DEBUG + ) result = self._backend_executor.execute( command=command, arguments=args, check=True, location=tmp_dir ) @@ -86,10 +91,7 @@ def _parse_coupling_groups(self, tmp_dir) -> AnyStr: output = [] pipe_input = self.settings.additional[_SGE.COUPLING_GROUPS] - structure = self.data.generic.get_argument_by_extension( - _SGE.FIELD_KEY_STRUCTURE - ) - arguments = ["-f", structure] + arguments = ["-f", _SGE.STD_STRUCTURE] if [f for f in os.listdir(tmp_dir) if f.endswith("ndx")]: arguments.extend(["-n", "index.ndx"]) else: @@ -124,7 +126,9 @@ def execute(self) -> None: tmp_dir = self._make_tmpdir() self._generate_amber_input_file() - self._write_input_files(tmp_dir) + self.get_topol().write_structure(tmp_dir) + self.get_topol().write_topol(tmp_dir) + self.write_input_files(tmp_dir) # gmx_MMPBSA requires the coupling groups of the receptor and ligand @@ -145,7 +149,7 @@ def execute(self) -> None: "-cg": self._parse_coupling_groups(tmp_dir), "-ci": self._get_file_from_dir(tmp_dir=tmp_dir, ext="ndx"), "-ct": self._get_arg("xtc"), - "-cp": self._get_arg("top"), + "-cp": _SGE.STD_TOPOL, # do not attempt to open the results in the GUI afterwards "-nogui": "", } diff --git a/src/icolos/core/workflow_steps/gromacs/pdb2gmx.py b/src/icolos/core/workflow_steps/gromacs/pdb2gmx.py index cd8cd3e..9175c38 100644 --- a/src/icolos/core/workflow_steps/gromacs/pdb2gmx.py +++ b/src/icolos/core/workflow_steps/gromacs/pdb2gmx.py @@ -1,7 +1,9 @@ +import shutil +from icolos.core.containers.gromacs_topol import GromacsTopol from icolos.utils.enums.program_parameters import ( GromacsEnum, ) -from icolos.utils.enums.step_enums import StepGromacsEnum +from icolos.utils.enums.step_enums import StepGromacsEnum, StepOpenFFEnum from pydantic import BaseModel from icolos.core.workflow_steps.gromacs.base import StepGromacsBase from icolos.utils.execute_external.gromacs import GromacsExecutor @@ -10,15 +12,23 @@ from icolos.core.workflow_steps.step import _LE import os import re -from typing import AnyStr, List from string import ascii_uppercase from rdkit import Chem +from openmm.app import PDBFile +from parmed.gromacs import GromacsTopologyFile +from openff.toolkit.topology import Molecule, Topology +from openff.toolkit.typing.engines.smirnoff import ForceField +import parmed as pmd _SGE = StepGromacsEnum() _GE = GromacsEnum() +_SOFE = StepOpenFFEnum() class StepGMXPdb2gmx(StepGromacsBase, BaseModel): + class Config: + arbitrary_types_allowed = True + _shell_executor: Executor = None _antechamber_executor: Executor = None _acpype_executor: Executor = None @@ -36,49 +46,6 @@ def __init__(self, **data): self._shell_executor = Executor() self._antechamber_executor = Executor() - def _modify_topol_file(self, tmp_dir, itp_files): - # read in the complex topol file, add the new itp files after the forcefield #include statement - with open(os.path.join(tmp_dir, _SGE.COMPLEX_TOP), "r") as f: - lines = f.readlines() - index = [idx for idx, s in enumerate(lines) if _SGE.FORCEFIELD_ITP in s][0] - new_topol = lines[: index + 1] - for file in itp_files: - new_topol.append(f'#include "{file}"\n') - for line in lines[index + 1 :]: - new_topol.append(line) - for file in itp_files: - stub = file.split(".")[0] - new_topol.append(f"{stub} 1\n") - with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "w") as f: - f.writelines(new_topol) - - # remove all but the final topol file form the paths, makes file handling cleaner later - top_files = [ - f for f in os.listdir(tmp_dir) if f.endswith("top") and f != _SGE.STD_TOPOL - ] - - for f in top_files: - os.remove(os.path.join(tmp_dir, f)) - - def _add_posre_to_topol(self, tmp_dir, lig): - """ - Add lines to topol file to invoke positional restraints for the parametrised ligands - """ - stub = lig.split(".")[0] - lig_itp = stub + ".itp" - with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "r") as f: - lines = f.readlines() - index = [idx for idx, s in enumerate(lines) if lig_itp in s][0] - new_topol = lines[: index + 1] - new_topol.append( - f"#ifdef POSRES_{stub.upper()}\n#include posre_{stub}.itp\n#endif\n" - ) - for line in lines[index + 1 :]: - new_topol.append(line) - - with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "w") as f: - f.writelines(new_topol) - def _split_protein_ligand_complex(self, tmp_dir): # split the file into protein and an arbitrary number of ligands and cofactors # Handle for multiple cofactors of the same type @@ -107,10 +74,10 @@ def _split_protein_ligand_complex(self, tmp_dir): # catch cases where ions have non-standard residue names e.g. NA3 elif parts[3][:2] in _GE.IONS and re.findall( - re.compile(fr"{parts[3][:2]}[0-9]+"), line + re.compile(rf"{parts[3][:2]}[0-9]+"), line ): - pattern = fr"{parts[3][:2]}[0-9]+" + pattern = rf"{parts[3][:2]}[0-9]+" pattern = re.compile(pattern) line = re.sub(pattern, parts[3][:2], line) @@ -139,10 +106,14 @@ def _split_protein_ligand_complex(self, tmp_dir): self._remove_temporary(os.path.join(tmp_dir, struct_file)) return list(ligand_lines.keys()) - def _parametrisation_pipeline(self, tmp_dir, input_pdb) -> None: + def _generate_gaff_params( + self, tmp_dir: str, input_pdb: str, topol: GromacsTopol + ) -> None: """ :param tmp_dir: step's base directory :param input_pdb: file name for the ligand being parametrised + + Produces a gmx ITP file for the ligand, and updates topology """ # main pipeline for producing GAFF parameters for a ligand charge_method = self.get_additional_setting( @@ -155,9 +126,12 @@ def _parametrisation_pipeline(self, tmp_dir, input_pdb) -> None: f"Computed formal charge: {formal_charge} for structure {input_pdb}", _LE.DEBUG, ) - + ligand_stub = input_pdb.split(".")[0] # Step 4: run the acpype script to generate the ligand topology file for GAFF - self._logger.log(f"Running acpype on structure {input_pdb}", _LE.DEBUG) + self._logger.log( + f"Running acpype on structure:{input_pdb}, charge method: {charge_method}", + _LE.DEBUG, + ) acpype_args = [ "-di", input_pdb, @@ -174,185 +148,55 @@ def _parametrisation_pipeline(self, tmp_dir, input_pdb) -> None: location=tmp_dir, check=True, ) - # produce the ndx file for genrestr later - index_file = stub + ".ndx" - ndx_arguments = ["-f", input_pdb, "-o", index_file] - self._backend_executor.execute( - command=_GE.MAKE_NDX, - arguments=ndx_arguments, - location=tmp_dir, - check=True, - pipe_input='echo -e "0 & ! a H* \nq"', # all system heavy atoms, excl hydrogens - ) - # generate positional restraints for the ligand - genrestr_args = [ - "-f", - input_pdb, - "-n", - index_file, - "-o", - f"posre_{stub}.itp", - "-fc", - _SGE.FORCE_CONSTANTS, - ] - self._backend_executor.execute( - command=_GE.GENRESTR, - arguments=genrestr_args, - location=tmp_dir, - check=True, - pipe_input="echo 3", - ) # the ligand always be the last thing on the index file + acpype_dir = os.path.join(tmp_dir, f"{ligand_stub}.acpype") + # TODO: refactor this + lig_itp = [ + f + for f in os.listdir(os.path.join(tmp_dir, acpype_dir)) + if f.endswith("GMX.itp") + ][0] + lig_pdb = [ + f + for f in os.listdir(os.path.join(tmp_dir, acpype_dir)) + if f.endswith("pdb") + ][0] - # we no longer need the ligand ndx file - self._remove_temporary(os.path.join(tmp_dir, index_file)) + topol.add_itp(os.path.join(tmp_dir, acpype_dir), [lig_itp], gen_posre=True) + topol.add_molecule(lig_itp.split("_")[0], 1) + topol.append_structure( + os.path.join(tmp_dir, acpype_dir), lig_pdb, sanitize=True + ) - def _sort_components(self, lig_ids: List, components: List): - """ - Ensure components go back into the concatenated pdb file in the same order as the original - """ - new_components = [] - for idx in lig_ids: - for component in components: - if idx in component: - new_components.append(component) - return new_components - - def _concatenate_structures(self, tmp_dir: str, lig_ids: List): - """ - Extract newly parametrised components, concatenate everything into a single pdb file + def _generate_openff_params( + self, tmp_dir: str, input_pdb: str, topol: GromacsTopol + ): """ + Generate Amber-compatible Smirnoff params for a single component - components = [] - for root, _, files in os.walk(tmp_dir): - for file in files: - if file.endswith("_NEW.pdb"): - components.append(os.path.join(root, file)) - components = self._sort_components(lig_ids, components) - self._logger.log(f"Found components: {components}", _LE.DEBUG) - with open(os.path.join(tmp_dir, _SGE.PROTEIN_PDB), "r") as f: - pdb_lines = f.readlines() + """ + # TODO: ensure we can handle parameter generation for multiple unique mol types + # get the mols + mols = [Molecule.from_smiles(self.get_additional_setting(_SOFE.UNIQUE_MOLS))] + pdb_file = PDBFile(os.path.join(self.get_additional_setting("lig_pdb"))) + omm_topology = pdb_file.topology - for file in components: - with open(file, "r") as f: + off_topology = Topology.from_openmm(omm_topology, unique_molecules=mols) - pdb_lines.extend(f.readlines()) + forcefield = ForceField(self.get_additional_setting(_SOFE.FORCEFIELD)) - pdb_lines = [ - l for l in pdb_lines if not any(s in l for s in ["TER", "ENDMDL", "REMARK"]) - ] - pdb_lines.extend(["TER\n", "ENDMDL\n"]) - with open(os.path.join(tmp_dir, "Complex.pdb"), "w") as f: - f.writelines(pdb_lines) + omm_system = forcefield.create_openmm_system(off_topology) - # also deal with renaming the itp files here - for root, _, files in os.walk(tmp_dir): - for item in files: - if ( - item.endswith("GMX.itp") - and _SGE.PROTEIN_TOP not in item - and os.path.join(root, item) != os.path.join(tmp_dir, item) - ): - os.rename( - os.path.join(root, item), - os.path.join(tmp_dir, item.split("_")[0]) + ".itp", - ) - # rename the protein top to complex - os.rename( - os.path.join(tmp_dir, _SGE.PROTEIN_TOP), - os.path.join(tmp_dir, _SGE.COMPLEX_TOP), + parmed_struct = pmd.openmm.load_topology( + omm_topology, system=omm_system, xyz=pdb_file.positions ) - - def _extract_atomtype(self, tmp_dir: str, file: str) -> List[AnyStr]: - """ - Pull the atomtype lines out of the topol file and return them as a list, write the sanitised itp file to directory - """ - with open(os.path.join(tmp_dir, file), "r") as f: - lines = f.readlines() - start_index = None - stop_index = None - for idx, line in enumerate(lines): - if _GE.ATOMTYPES in line: - start_index = idx - if _GE.MOLECULETYPES in line: - stop_index = idx - - selection = lines[start_index:stop_index] - # remove the offending lines from the topol - remaining = lines[:start_index] - remaining.extend(lines[stop_index:]) - self._remove_temporary(os.path.join(tmp_dir, file)) - with open(os.path.join(tmp_dir, file), "w") as f: - f.writelines(remaining) - return selection - - def _remove_duplicate_atomtypes(self, atomtypes: List): - output = [atomtypes[0]] - for line in atomtypes: - if line not in output: - output.append(line) - return output - - def _modify_itp_files(self, tmp_dir): - # cut the moleculetype directives out of all the individual itp files and add them to the top of the topol - atomtype_lines = [] - # read the topol file, identify all the itp files it is #including - with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "r") as f: - topol_lines = [ - l.split()[-1].strip('"') - for l in f.readlines() - if ".itp" in l and "posre" not in l - ] - topol_lines = [l for l in topol_lines if l in os.listdir(tmp_dir)] - for file in topol_lines: - atomtype_lines.extend(self._extract_atomtype(tmp_dir, file)) - atomtype_lines = self._remove_duplicate_atomtypes(atomtype_lines) - - # write an 'atomtypes.itp' files to be included just below the forcefield, with all the atomtypes contained in the extra components - with open(os.path.join(tmp_dir, "atomtypes.itp"), "w") as f: - f.writelines(atomtype_lines) - - with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "r") as f: - lines = f.readlines() - self._remove_temporary(os.path.join(tmp_dir, _SGE.STD_TOPOL)) - index = [idx for idx, s in enumerate(lines) if _SGE.FORCEFIELD_ITP in s][0] - new_topol = lines[: index + 1] - - new_topol.append('#include "atomtypes.itp"\n') - new_topol.extend(lines[index + 1 :]) - with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "w") as f: - f.writelines(new_topol) - - def _modify_water_molecules(self, tmp_dir: str): - with open(os.path.join(tmp_dir, _SGE.COMPLEX_PDB), "r") as f: - lines = f.readlines() - - solvent = [] - # pick out the water lines - for line in lines: - if any([x in line for x in _GE.SOLVENTS]): - solvent.append(line) - for line in solvent: - lines.remove(line) - lines.extend(solvent) - for line in lines: - if any([x in line for x in _GE.TERMINATIONS]): - lines.remove(line) - - with open(os.path.join(tmp_dir, _SGE.COMPLEX_PDB), "w") as f: - f.writelines(lines) - - if solvent: - # modify the topol to put the solvent in last in the [ molecules ] directive - with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "r") as f: - lines = f.readlines() - molecule_idx = lines.index(_GE.MOLECULES) - for line in lines[molecule_idx:]: - if any([x in line for x in _GE.SOLVENTS]): - out = lines.pop(lines.index(line)) - lines.append(out) - with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "w") as f: - f.writelines(lines) + parmed_struct.save(os.path.join(tmp_dir, "MOL.top"), overwrite=True) + parmed_struct.save(os.path.join(tmp_dir, "MOL.pdb"), overwrite=True) + gmx_top = GromacsTopologyFile().from_structure(parmed_struct) + gmx_top.write(os.path.join(tmp_dir, "MOL.itp"), itp=True) + topol.add_itp(path=tmp_dir, files=["MOL.itp"]) + topol.generate_posre(path=tmp_dir, itp_file="MOL.itp") + topol.append_structure(path=tmp_dir, file="MOL.pdb", sanitize=True) def execute(self): """Takes in a ligand pdb file and generates the required topology, based on the backend specified in the config json. @@ -361,17 +205,14 @@ def execute(self): Execution looks like this currently: (1) split the protein from the other components (2) generate the topology for the protein separately - (3) identify components to be parametrised (cofactors, ligands etc) - (4) run the parametrisation pipeline on each component in serial (reasonably fast exec time per ligand) - (4a) store the resIDs of the ligands using the file handling system to be retrieved in a later step - (5) modify the topology file to add the #include statements for the relevant itp files - (6) convert the resulting concatenated pdb file to .gro with editconf - (7) add the posres stuff to the topol file for each ligand for the subsequent equilibration steps - (8) if more than one ligand, modify the itp files to ensure all moleculetype directives are specified first + (4) run the parametrisation pipeline on each HET component in serial, generating either GAFF or SMIRNOFF params + (5) combine the topologies + """ tmp_dir = self._make_tmpdir() - self._write_input_files(tmp_dir) # dump generic data fields to the tmpdir + topol = self.get_topol() + self.write_input_files(tmp_dir) lig_ids = self._split_protein_ligand_complex(tmp_dir) self._logger.log( f"Parameters will be generated for the following components: {str(lig_ids)}", @@ -390,62 +231,40 @@ def execute(self): self._backend_executor.execute( command=_GE.PDB2GMX, arguments=arguments_pdb2gmx, location=tmp_dir ) + # instantiate a separate topology object that will do a beter job of writing out than parmed can + + topol.forcefield = self.settings.arguments.parameters["-ff"] + topol.water = self.settings.arguments.parameters["-water"] + topol.parse(os.path.join(tmp_dir, _SGE.PROTEIN_TOP)) + topol.set_structure(tmp_dir, _SGE.PROTEIN_PDB, sanitize=True) + posre_files = [ + f for f in os.listdir(tmp_dir) if f.endswith(".itp") and "posre" in f + ] + topol.add_posre(tmp_dir, posre_files) + param_method = self.get_additional_setting(_SGE.PARAM_METHOD, default=_SGE.GAFF) for lig in lig_ids: input_file = lig + ".pdb" - # generate the itp files for each component, named by their PDB identifier - self._parametrisation_pipeline(tmp_dir, input_file) - - # concatenate the structures to produce Complex.pdb - if lig_ids: - self._concatenate_structures(tmp_dir, lig_ids) - # step 6: Modify protein topol file for ligand - itp_files = [ - f - for f in os.listdir(tmp_dir) - if f.endswith(".itp") - and "posre" not in f - and not any( - [x in f for x in _GE.PRIMARY_COMPONENTS] - ) # avoid any duplicated itp file entries from components of the protein already handles by pdb2gmx (TODO: makes sure this works for DNA/RNA as well) - ] - # need to sort the itp files to match the ordering from the original pdb structure - itp_files = self._sort_components(lig_ids, itp_files) - self._modify_topol_file(tmp_dir, itp_files) - - # step 10: modify the topol file to add the ligand posre file if restraints are applied - for lig in lig_ids: - self._add_posre_to_topol(tmp_dir, lig) - - # if more than two ligands present, modify the ligand itp files so all the [atomtype] directives come before the [moleculetype] directives in the full topol - if len(lig_ids) > 1: - self._modify_itp_files(tmp_dir) - - else: - # just convert the file names in place, no addition of ligands - os.rename( - os.path.join(tmp_dir, _SGE.PROTEIN_TOP), - os.path.join(tmp_dir, _SGE.STD_TOPOL), - ) - os.rename( - os.path.join(tmp_dir, _SGE.PROTEIN_PDB), - os.path.join(tmp_dir, _SGE.COMPLEX_PDB), - ) - - # step 7: run editconf to convert the combined pdb to a gro file - - # do final check to move crystallographic waters to the end of the pdb file, after - # the ligand, to ensure continuous solvent group later - self._modify_water_molecules(tmp_dir) - # and adjust the topol file to put any solvent last - - editconf_arguments = ["-f", _SGE.COMPLEX_PDB, "-o", "structure.gro"] + if param_method == _SGE.GAFF: + # generate the itp files for each component, named by their PDB identifier + self._generate_gaff_params(tmp_dir, input_file, topol) + elif param_method == _SGE.OPENFF: + self._generate_openff_params(tmp_dir, input_file, topol) + else: + raise NotImplementedError + + os.remove(os.path.join(tmp_dir, _SGE.PROTEIN_TOP)) + os.remove(os.path.join(tmp_dir, _SGE.PROTEIN_PDB)) + # final writeout of parametrized system + topol.write_structure(tmp_dir, _SGE.COMPLEX_PDB) + # convert pdb to gro + editconf_arguments = ["-f", _SGE.COMPLEX_PDB, "-o", _SGE.STD_STRUCTURE] self._backend_executor.execute( command=_GE.EDITCONF, arguments=editconf_arguments, location=tmp_dir, check=True, ) - + topol.set_structure(tmp_dir) self._parse_output(tmp_dir) self._remove_temporary(tmp_dir) diff --git a/src/icolos/core/workflow_steps/gromacs/rsmd.py b/src/icolos/core/workflow_steps/gromacs/rsmd.py index 10201d8..0edc1ef 100644 --- a/src/icolos/core/workflow_steps/gromacs/rsmd.py +++ b/src/icolos/core/workflow_steps/gromacs/rsmd.py @@ -27,7 +27,7 @@ def execute(self): # compare against. rmsd is computed for every step of the trj file # write out generic files - self._write_input_files(tmp_dir) + self.write_input_files(tmp_dir) # conformer coming from a Compound object conf = self._unroll_compounds(self.data.compounds) diff --git a/src/icolos/core/workflow_steps/gromacs/select.py b/src/icolos/core/workflow_steps/gromacs/select.py index 76efee1..d2f1f5f 100644 --- a/src/icolos/core/workflow_steps/gromacs/select.py +++ b/src/icolos/core/workflow_steps/gromacs/select.py @@ -24,7 +24,7 @@ def execute(self): tmp_dir = self._make_tmpdir() # write out generic files - self._write_input_files(tmp_dir) + self.write_input_files(tmp_dir) flag_dict = { "-s": self.data.generic.get_argument_by_extension("tpr"), diff --git a/src/icolos/core/workflow_steps/gromacs/solvate.py b/src/icolos/core/workflow_steps/gromacs/solvate.py index 5de9df9..2b03685 100644 --- a/src/icolos/core/workflow_steps/gromacs/solvate.py +++ b/src/icolos/core/workflow_steps/gromacs/solvate.py @@ -22,15 +22,15 @@ def __init__(self, **data): def execute(self): tmp_dir = self._make_tmpdir() - self._write_input_files(tmp_dir) - structure_file = self.data.generic.get_argument_by_extension( - _SGE.FIELD_KEY_STRUCTURE - ) + topol = self.get_topol() + topol.write_structure(tmp_dir) + topol.write_topol(tmp_dir) + arguments = self._parse_arguments( flag_dict={ - "-cp": structure_file, - "-p": self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TOPOL), - "-o": structure_file, + "-cp": _SGE.STD_STRUCTURE, + "-p": _SGE.STD_TOPOL, + "-o": _SGE.STD_STRUCTURE, } ) result = self._backend_executor.execute( @@ -41,5 +41,7 @@ def execute(self): self._logger.log( f"Completed execution for {self.step_id} successfully.", _LE.INFO ) - self._parse_output(tmp_dir) + # self._parse_output(tmp_dir) + topol.set_structure(tmp_dir) + topol.set_topol(tmp_dir) self._remove_temporary(tmp_dir) diff --git a/src/icolos/core/workflow_steps/gromacs/trjconv.py b/src/icolos/core/workflow_steps/gromacs/trjconv.py index bafa0ba..9dc4ddc 100644 --- a/src/icolos/core/workflow_steps/gromacs/trjconv.py +++ b/src/icolos/core/workflow_steps/gromacs/trjconv.py @@ -25,7 +25,7 @@ def __init__(self, **data): def execute(self): tmp_dir = self._make_tmpdir() - self._write_input_files(tmp_dir) + self.write_input_files(tmp_dir) xtc_file = self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_XTC) flag_dict = { diff --git a/src/icolos/core/workflow_steps/openff/__init__.py b/src/icolos/core/workflow_steps/openff/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/icolos/core/workflow_steps/openff/openff2gmx.py b/src/icolos/core/workflow_steps/openff/openff2gmx.py new file mode 100644 index 0000000..76132ac --- /dev/null +++ b/src/icolos/core/workflow_steps/openff/openff2gmx.py @@ -0,0 +1,68 @@ +from icolos.core.workflow_steps.step import StepBase +from pydantic import BaseModel +from openmm.app import PDBFile + +import parmed +from openff.toolkit.topology import Molecule, Topology +from openff.toolkit.typing.engines.smirnoff import ForceField +from openff.toolkit.utils import get_data_file_path + +from icolos.utils.enums.step_enums import StepOpenFFEnum +import os + +# Note that this implementation leverages parmed for now, although will likely move over to the OpenFF Interchange tooling once stable +_SOFE = StepOpenFFEnum() + +# TOOD: Eventually this step should be able to do a pdb2gmx job +class StepOFF2gmx(StepBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + def parametrise_mols(self, tmp_dir: str): + """ + Generate parameters for each mol + """ + # TODO: do we want to throw everything together or split the params into separate files? + mols = [ + Molecule.from_smiles(smi) + for smi in self.get_additional_setting(_SOFE.UNIQUE_MOLS) + ] + pdb_file = PDBFile( + os.path.join(tmp_dir, self.data.generic.get_argument_by_extension("pdb")) + ) + omm_topology = pdb_file.topology + + off_topology = Topology.from_openmm(omm_topology, unique_molecules=mols) + + forcefield = ForceField(self.get_additional_setting(_SOFE.FORCEFIELD)) + + omm_system = forcefield.create_openmm_system(off_topology) + + if self.get_additional_setting(_SOFE.METHOD, _SOFE.PARMED) == _SOFE.PARMED: + parmed_struct = parmed.openmm.load_topology( + omm_topology, omm_system, pdb_file.positions + ) + parmed_struct.save(os.path.join(tmp_dir, "MOL.top"), overwrite=True) + parmed_struct.save(os.path.join(tmp_dir, "MOL.gro"), overwrite=True) + + # TODO: validate energy differences + + elif self.get_additional_setting(_SOFE.METHOD) == _SOFE.INTERCHANGE: + raise NotImplementedError + else: + raise NotImplementedError + + def execute(self): + """ + Builds a system and parametrise using OpenFF SAGE params, then convert to a GROMACS top/gro format for downstream simulation + """ + tmp_dir = self._make_tmpdir() + self.data.generic.write_out_all_files(tmp_dir) + + self.parametrise_mols(tmp_dir) + + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) + + +# If we want to build OpenFF params instead of gaff, we would need to make a call to a different parametrisation pipeline, then load the protein into a ParmEd System, combine with the OpenFF-built system and combine into a gro/top file. diff --git a/src/icolos/core/workflow_steps/pmx/assemble_systems.py b/src/icolos/core/workflow_steps/pmx/assemble_systems.py index 0fc583a..444e243 100644 --- a/src/icolos/core/workflow_steps/pmx/assemble_systems.py +++ b/src/icolos/core/workflow_steps/pmx/assemble_systems.py @@ -25,7 +25,7 @@ def execute(self): assert self.work_dir is not None and os.path.isdir(self.work_dir) # get edges from the perturbation map attached to the step - edges = self.get_edges() + edges = [e.get_edge_id() for e in self.get_edges()] # enforce one edge per task list (results in multiple batches for large maps) self.execution.parallelization.max_length_sublists = 1 @@ -34,13 +34,15 @@ def execute(self): ) self._subtask_container.load_data(edges) self._execute_pmx_step_parallel( - run_func=self._execute_command, step_id="pmx assemble_systems" + run_func=self._execute_command, + step_id="pmx assemble_systems", + result_checker=self._check_results, ) def _execute_command(self, jobs: List): args = { - "-edges": '"' + " ".join([e.get_edge_id() for e in jobs]) + '"', + "-edges": '"' + " ".join([e for e in jobs]) + '"', "-ligand_path": os.path.join(self.work_dir, _PAE.LIGAND_DIR), "-workPath": self.work_dir, } @@ -50,3 +52,22 @@ def _execute_command(self, jobs: List): check=True, location=self.work_dir, ) + + def _check_results(self, batch: List[List[str]]) -> List[List[bool]]: + output_files = ["ffmerged.itp"] + results = [] + for subjob in batch: + subjob_results = [] + for job in subjob: + subjob_results.append( + all( + [ + os.path.isfile( + os.path.join(self.work_dir, job, "hybridStrTop", f) + ) + for f in output_files + ] + ) + ) + results.append(subjob_results) + return results diff --git a/src/icolos/core/workflow_steps/pmx/atomMapping.py b/src/icolos/core/workflow_steps/pmx/atomMapping.py index dfbf757..e16894d 100644 --- a/src/icolos/core/workflow_steps/pmx/atomMapping.py +++ b/src/icolos/core/workflow_steps/pmx/atomMapping.py @@ -39,11 +39,12 @@ def _prepare_arguments(self, args, output_dir): prepared_args.append(value) return prepared_args - def _execute_command(self, jobs: List[Edge]): - assert isinstance(jobs, list) + def _execute_command(self, jobs: List[str]): + edge = jobs[0] - lig1 = edge.get_source_node_name() - lig2 = edge.get_destination_node_name() + parts = edge.split("_") + lig1 = parts[0] + lig2 = parts[1] # write them to the right dir as a pdb from the outset arguments = { "-i1": os.path.join( @@ -59,7 +60,7 @@ def _execute_command(self, jobs: List[Edge]): "MOL.pdb", ), } - output_dir = os.path.join(self.work_dir, edge.get_edge_id(), _PE.HYBRID_STR_TOP) + output_dir = os.path.join(self.work_dir, edge, _PE.HYBRID_STR_TOP) arguments = self._prepare_arguments(args=arguments, output_dir=output_dir) result = self._backend_executor.execute( @@ -73,7 +74,7 @@ def execute(self): # check the workflow has been configured correctly to use a shared work_dir assert self.work_dir is not None and os.path.isdir(self.work_dir) - edges = self.get_edges() + edges = [e.get_edge_id() for e in self.get_edges()] # enforce single edge per job queue self.execution.parallelization.max_length_sublists = 1 self._subtask_container = SubtaskContainer( @@ -99,7 +100,7 @@ def _check_result(self, batch: List[List[Edge]]) -> List[List[bool]]: all( [ os.path.isfile( - os.path.join(job.get_edge_id(), "hybridStrTop", f) + os.path.join(self.work_dir, job, "hybridStrTop", f) ) for f in output_files ] diff --git a/src/icolos/core/workflow_steps/pmx/base.py b/src/icolos/core/workflow_steps/pmx/base.py index 4f021fe..371d484 100644 --- a/src/icolos/core/workflow_steps/pmx/base.py +++ b/src/icolos/core/workflow_steps/pmx/base.py @@ -3,10 +3,13 @@ from subprocess import CompletedProcess from typing import Callable, Dict, List from pydantic import BaseModel +from icolos.core.containers.compound import Compound, Conformer +from icolos.core.containers.perturbation_map import Edge, Node, PerturbationMap from rdkit.Chem import rdmolops from icolos.core.containers.compound import Compound, Conformer from icolos.core.containers.perturbation_map import Node, PerturbationMap from icolos.core.workflow_steps.step import StepBase +from icolos.utils.enums.parallelization import ParallelizationEnum from icolos.utils.enums.program_parameters import GromacsEnum, StepPMXEnum from icolos.utils.enums.step_enums import StepGromacsEnum from icolos.utils.execute_external.execute import Executor @@ -16,10 +19,12 @@ from icolos.core.workflow_steps.step import _LE import shutil import glob +from rdkit.Chem import rdmolops _GE = GromacsEnum() _SGE = StepGromacsEnum() _SPE = StepPMXEnum() +_PE = ParallelizationEnum class StepPMXBase(StepBase, BaseModel): @@ -196,7 +201,7 @@ def _clean_pdb_structure(self, tmp_dir: str) -> None: f.writelines(cleaned_lines) def _parametrisation_pipeline( - self, tmp_dir, conf: Conformer = None, include_top=False, include_gro=False + self, tmp_dir, conf: Conformer, include_top=False, include_gro=False ): # main pipeline for producing GAFF parameters for a ligand charge_method = self.get_additional_setting( @@ -205,10 +210,6 @@ def _parametrisation_pipeline( formal_charge = ( rdmolops.GetFormalCharge(conf.get_molecule()) if conf is not None else 0 ) - self._logger.log( - f"Formal charge for ligand {conf.get_compound_name()}: {formal_charge}", - _LE.DEBUG, - ) arguments_acpype = [ "-di", "MOL.pdb", @@ -295,17 +296,27 @@ def _execute_pmx_step_parallel( parallelizer.execute_parallel(jobs=jobs, **kwargs) - # # TODO: find a reliable way to sort this, ideally by inspecting log files if result_checker is not None: batch_results = result_checker(jobs) good_results = 0 for task, result in zip(next_batch, batch_results): + # returns boolean arrays: False => failed job for subtask, sub_result in zip(task, result): - if sub_result: + if sub_result == False: subtask.set_status_failed() self._logger.log( f"Warning: job {subtask} failed!", _LE.WARNING ) + if ( + self.get_perturbation_map().strict_execution + and isinstance(subtask.data, str) + ): + edge = self.get_perturbation_map().get_edge_by_id( + subtask.data + ) + print("got edge", edge) + edge._set_status(_PE.STATUS_FAILED) + print("setting status to failed!") else: subtask.set_status_success() good_results += 1 @@ -316,7 +327,7 @@ def _execute_pmx_step_parallel( else: self._logger.log( - f"Warning: Return codes for step {step_id} are not being checked!", + f"Step {step_id} is running without a result checker - failed executions will be ignored!", _LE.WARNING, ) for element in next_batch: @@ -413,6 +424,7 @@ def _construct_perturbation_map(self, work_dir: str, replicas: int): "pdb", rtn_file_object=True ), replicas=replicas, + strict_execution=self.get_additional_setting(_SPE.STRICT, default=True), ) perturbation_map.parse_map_file( os.path.join(self.work_dir, log_file.get_file_name()) diff --git a/src/icolos/core/workflow_steps/pmx/ligandHybrid.py b/src/icolos/core/workflow_steps/pmx/ligandHybrid.py index f793866..1f71600 100644 --- a/src/icolos/core/workflow_steps/pmx/ligandHybrid.py +++ b/src/icolos/core/workflow_steps/pmx/ligandHybrid.py @@ -21,10 +21,11 @@ def __init__(self, **data): super().__init__(**data) self._initialize_backend(PMXExecutor) - def _execute_command(self, jobs: List[Edge]): + def _execute_command(self, jobs: List[str]): edge = jobs[0] - lig1 = edge.get_source_node_name() - lig2 = edge.get_destination_node_name() + parts = edge.split("_") + lig1 = parts[0] + lig2 = parts[1] arguments = { "-i1": os.path.join( @@ -43,7 +44,7 @@ def _execute_command(self, jobs: List[Edge]): "-itp2": os.path.join(self.work_dir, _PAE.LIGAND_DIR, lig2, "MOL.itp"), } # write output files the hybridStrTop directory for each edge - output_dir = os.path.join(self.work_dir, edge.get_edge_id(), _PE.HYBRID_STR_TOP) + output_dir = os.path.join(self.work_dir, edge, _PE.HYBRID_STR_TOP) arguments = self._prepare_arguments(args=arguments, output_dir=output_dir) self._backend_executor.execute( command=_PE.LIGANDHYBRID, @@ -82,14 +83,8 @@ def _prepare_arguments(self, args, output_dir): def execute(self): assert self.work_dir is not None and os.path.isdir(self.work_dir) - edges = self.get_edges() - total_edges = len(edges) - # for idx, edge in enumerate(edges): - # progress = np.round(idx / total_edges * 100, 2) - # self._logger.log( - # f"Executing pmx ligandHybrid for edge {edge.get_edge_id()} - {progress}% complete", - # _LE.DEBUG, - # ) + edges = [e.get_edge_id() for e in self.get_edges()] + self.execution.parallelization.max_length_sublists = 1 self._subtask_container = SubtaskContainer( max_tries=self.execution.failure_policy.n_tries @@ -101,7 +96,7 @@ def execute(self): result_checker=self._check_result, ) - def _check_result(self, batch: List[List[Edge]]) -> List[List[bool]]: + def _check_result(self, batch: List[List[str]]) -> List[List[bool]]: """ Look in each hybridStrTop dir and check the output pdb files exist for the edges """ @@ -114,7 +109,7 @@ def _check_result(self, batch: List[List[Edge]]) -> List[List[bool]]: all( [ os.path.isfile( - os.path.join(job.get_edge_id(), "hybridStrTop", f) + os.path.join(self.work_dir, job, "hybridStrTop", f) ) for f in output_files ] diff --git a/src/icolos/core/workflow_steps/pmx/prepare_simulations.py b/src/icolos/core/workflow_steps/pmx/prepare_simulations.py index 63858ac..d581343 100644 --- a/src/icolos/core/workflow_steps/pmx/prepare_simulations.py +++ b/src/icolos/core/workflow_steps/pmx/prepare_simulations.py @@ -120,7 +120,12 @@ def _check_result(self, batch: List[List[str]]) -> List[List[bool]]: subjob_results = [] for job in subjob: subjob_results.append( - all([os.path.isfile(os.path.join(job, f)) for f in output_files]) + all( + [ + os.path.isfile(os.path.join(self.work_dir, job, f)) + for f in output_files + ] + ) ) results.append(subjob_results) return results diff --git a/src/icolos/core/workflow_steps/pmx/prepare_transitions.py b/src/icolos/core/workflow_steps/pmx/prepare_transitions.py index c85287e..c1f0deb 100644 --- a/src/icolos/core/workflow_steps/pmx/prepare_transitions.py +++ b/src/icolos/core/workflow_steps/pmx/prepare_transitions.py @@ -35,7 +35,9 @@ def execute(self): ) self._subtask_container.load_data(edges) self._execute_pmx_step_parallel( - run_func=self.prepare_transitions, step_id="pmx prepare_transitions" + run_func=self.prepare_transitions, + step_id="pmx prepare_transitions", + result_checker=self._check_result, ) def _extract_snapshots(self, eqpath, tipath): @@ -136,7 +138,12 @@ def _check_result(self, batch: List[List[str]]) -> List[List[bool]]: subjob_results = [] for job in subjob: subjob_results.append( - all([os.path.isfile(os.path.join(job, f)) for f in output_files]) + all( + [ + os.path.isfile(os.path.join(self.work_dir, job, f)) + for f in output_files + ] + ) ) results.append(subjob_results) return results diff --git a/src/icolos/core/workflow_steps/pmx/run_analysis.py b/src/icolos/core/workflow_steps/pmx/run_analysis.py index 09811fe..41ecb24 100644 --- a/src/icolos/core/workflow_steps/pmx/run_analysis.py +++ b/src/icolos/core/workflow_steps/pmx/run_analysis.py @@ -41,7 +41,9 @@ def execute(self): ) self._subtask_container.load_data(edges) self._execute_pmx_step_parallel( - run_func=self.run_analysis, step_id="pmx_run_analysis" + run_func=self.run_analysis, + step_id="pmx_run_analysis", + result_checker=self._check_result, ) self.analysis_summary(edges) @@ -309,7 +311,9 @@ def _check_result(self, batch: List[List[str]]) -> List[List[bool]]: subjob_results.append( all( [ - os.path.isfile(os.path.join(job, "analyse1", f)) + os.path.isfile( + os.path.join(self.work_dir, job, "analyse1", f) + ) for f in output_files ] ) diff --git a/src/icolos/core/workflow_steps/pmx/run_simulations.py b/src/icolos/core/workflow_steps/pmx/run_simulations.py index 25643d5..3eea0fa 100644 --- a/src/icolos/core/workflow_steps/pmx/run_simulations.py +++ b/src/icolos/core/workflow_steps/pmx/run_simulations.py @@ -1,7 +1,4 @@ -from importlib.resources import path -from subprocess import CompletedProcess -from typing import Dict, List -from icolos.core.containers.perturbation_map import Edge +from typing import List from icolos.core.workflow_steps.pmx.base import StepPMXBase from pydantic import BaseModel from icolos.core.workflow_steps.step import _LE @@ -210,6 +207,8 @@ def _inspect_log_files(self, jobs: List[str]) -> List[List[bool]]: location = os.path.join("/".join(sim.split("/")[:-1]), "md.log") with open(location, "r") as f: lines = f.readlines() - subtask_results.append(any(["Finished mdrun" in l for l in lines])) + subtask_results.append( + any(["Finished mdrun" in l for l in lines[-20:]]) + ) results.append(subtask_results) return results diff --git a/src/icolos/core/workflow_steps/pmx/setup_workpath.py b/src/icolos/core/workflow_steps/pmx/setup_workpath.py index bd9681e9..ff53c27 100644 --- a/src/icolos/core/workflow_steps/pmx/setup_workpath.py +++ b/src/icolos/core/workflow_steps/pmx/setup_workpath.py @@ -1,7 +1,7 @@ -from icolos.core.containers.perturbation_map import Node import os -from typing import Dict +from typing import List from pydantic import BaseModel +from icolos.core.containers.perturbation_map import Node from icolos.core.workflow_steps.pmx.base import StepPMXBase from icolos.utils.enums.program_parameters import GromacsEnum from icolos.utils.enums.step_enums import StepGromacsEnum @@ -83,6 +83,7 @@ def execute(self): self._execute_pmx_step_parallel( run_func=self._parametrise_nodes, step_id="pmx_setup", + result_checker=self._check_results, ) # create the output folder structure @@ -113,3 +114,27 @@ def execute(self): for sim in self.sim_types: simpath = f"{runpath}/{sim}".format(runpath, sim) os.makedirs(simpath, exist_ok=True) + + def _check_results(self, batch: List[List[Node]]) -> List[List[bool]]: + output_files = ["ffMOL.itp", "MOL.itp"] + results = [] + for subjob in batch: + subjob_results = [] + for job in subjob: + subjob_results.append( + all( + [ + os.path.isfile( + os.path.join( + self.work_dir, + "input", + job.get_node_id(), + f, + ) + ) + for f in output_files + ] + ) + ) + results.append(subjob_results) + return results diff --git a/src/icolos/core/workflow_steps/schrodinger/base.py b/src/icolos/core/workflow_steps/schrodinger/base.py index 7a1ba8f..2244f9a 100644 --- a/src/icolos/core/workflow_steps/schrodinger/base.py +++ b/src/icolos/core/workflow_steps/schrodinger/base.py @@ -83,9 +83,9 @@ def _translate_PDB_to_MAE( def _replace_config_value(self, key, value, config): value = str(value) - pattern = fr"({key} =).*" + pattern = rf"({key} =).*" pattern = re.compile(pattern) - config = re.sub(pattern, fr"\1 {value}", config) + config = re.sub(pattern, rf"\1 {value}", config) return config def _get_template(self, file_name): diff --git a/src/icolos/core/workflow_steps/step.py b/src/icolos/core/workflow_steps/step.py index 8f6b853..3473cd6 100644 --- a/src/icolos/core/workflow_steps/step.py +++ b/src/icolos/core/workflow_steps/step.py @@ -508,7 +508,7 @@ def _log_result(self, result: CompletedProcess): for line in result.stdout.split("\n"): self._logger_blank.log(line, _LE.DEBUG) - def get_additional_setting(self, key: str, default: str): + def get_additional_setting(self, key: str, default: str = None): """ Query settings.additional with the key, if not set use the default """ diff --git a/src/icolos/scripts/cli.py b/src/icolos/scripts/cli.py deleted file mode 100644 index 8331502..0000000 --- a/src/icolos/scripts/cli.py +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -import json -import argparse - -from icolos.core.composite_agents.workflow import WorkFlow - -from icolos.loggers.entrypoint_logger import EntryPointLogger - -from icolos.utils.enums.composite_agents_enums import WorkflowEnum -from icolos.utils.enums.logging_enums import LoggingConfigEnum -from icolos.utils.enums.entry_points import ExecutorEnum - -from icolos.utils.entry_point_functions.logging_helper_functions import ( - initialize_logging, -) -from icolos.utils.entry_point_functions.parsing_functions import parse_header -from icolos.utils.general.files_paths import attach_root_path - - -class IcolosCLI: - def __init__(self) -> None: - # enums - _LE = LoggingConfigEnum() - _EE = ExecutorEnum() - _WE = WorkflowEnum() - - # initialize logger - logger = EntryPointLogger() - - # get the input parameters and parse them - parser = argparse.ArgumentParser( - description='Implements entry point for the "Icolos" workflow class.' - ) - parser.add_argument( - "-conf", - type=str, - default=None, - help="A path to an workflow's configuration file (JSON dictionary) that is to be executed.", - ) - parser.add_argument( - "-debug", - action="store_true", - help='Set this flag to activate the inbuilt debug logging mode (this will overwrite parameter "-log_conf", if set).', - ) - parser.add_argument( - "--global_variables", - nargs="+", - default=None, - type=str, - help='List of strings, setting global variables with key and value, e.g. "root:/path/to/root".', - ) - parser.add_argument( - "--global_settings", - nargs="+", - default=None, - type=str, - help='List of strings, setting global settings with key and value, e.g. "remove_temporary:False".', - ) - args, args_unk = parser.parse_known_args() - - if args.conf is None or not os.path.isfile(args.conf): - raise Exception( - 'Parameter "-conf" must be a relative or absolute path to a configuration (JSON) file.' - ) - - # load configuration - with open(args.conf) as file: - conf = file.read().replace("\r", "").replace("\n", "") - conf = json.loads(conf) - - # set the logging configuration according to parameters - log_conf = attach_root_path(_LE.PATH_CONFIG_DEFAULT) - if args.debug: - log_conf = attach_root_path(_LE.PATH_CONFIG_DEBUG) - logger = initialize_logging(log_conf_path=log_conf, workflow_conf=conf) - - # update global variables and settings - conf = parse_header( - conf=conf, - args=args, - entry_point_path=os.path.realpath(__file__), - logger=logger, - ) - - # generate workflow object - workflow = WorkFlow(**conf[_WE.WORKFLOW]) - workflow.initialize() - - # execute the whole workflow - workflow.execute() - - sys.exit(0) - - -def entry_point(): - IcolosCLI() - - -if __name__ == "__main__": - entry_point() diff --git a/src/icolos/scripts/executor.py b/src/icolos/scripts/executor.py index a52ba64..ca59301 100644 --- a/src/icolos/scripts/executor.py +++ b/src/icolos/scripts/executor.py @@ -19,6 +19,7 @@ initialize_logging, ) from icolos.utils.entry_point_functions.parsing_functions import parse_header +from icolos.utils.general.citation_generator import print_citations from icolos.utils.general.files_paths import attach_root_path @@ -82,11 +83,11 @@ def main(): conf = parse_header( conf=conf, args=args, entry_point_path=os.path.realpath(__file__), logger=logger ) - # generate workflow object workflow = WorkFlow(**conf[_WE.WORKFLOW]) workflow.initialize() + print_citations(workflow._initialized_steps) # execute the whole workflow st_time = datetime.now() workflow.execute() diff --git a/src/icolos/scripts/integration_tests.py b/src/icolos/scripts/integration_tests.py deleted file mode 100644 index a57f659..0000000 --- a/src/icolos/scripts/integration_tests.py +++ /dev/null @@ -1,5 +0,0 @@ -from tests.integration_tests import * - - -if __name__ == "__main__": - unittest.main() diff --git a/src/icolos/utils/enums/execution_enums.py b/src/icolos/utils/enums/execution_enums.py index 0b2574d..f0941c1 100644 --- a/src/icolos/utils/enums/execution_enums.py +++ b/src/icolos/utils/enums/execution_enums.py @@ -11,3 +11,4 @@ class ExecutionPlatformEnum(str, Enum): CORES = "cores" CORE = "core" GPU = "gpu" + SERIAL = "serial" diff --git a/src/icolos/utils/enums/flow_control_enums.py b/src/icolos/utils/enums/flow_control_enums.py index 50b7093..ddeb693 100644 --- a/src/icolos/utils/enums/flow_control_enums.py +++ b/src/icolos/utils/enums/flow_control_enums.py @@ -1,6 +1,6 @@ +from icolos.core.flow_control.iterator import StepIterator from icolos.core.workflow_steps.prediction.active_learning import StepActiveLearning from icolos.utils.enums.step_enums import StepBaseEnum -from icolos.core.flow_control.iterator import StepIterator _SBE = StepBaseEnum @@ -10,6 +10,6 @@ class FlowControlInitializationEnum: # Keep these separate to the main pool of steps to avoid circular imports FLOW_CONTROL_INIT_DICT = { - _SBE.STEP_ITERATOR: StepIterator, _SBE.STEP_ACTIVE_LEARNING: StepActiveLearning, + _SBE.STEP_ITERATOR: StepIterator, } diff --git a/src/icolos/utils/enums/program_parameters.py b/src/icolos/utils/enums/program_parameters.py index e4ca1c1..0fe1813 100644 --- a/src/icolos/utils/enums/program_parameters.py +++ b/src/icolos/utils/enums/program_parameters.py @@ -348,6 +348,39 @@ def __setattr__(self, key, value): raise ValueError("No changes allowed.") +class GoldEnum: + + GOLD_CALL = "gold_auto" + GOLD_HELP = "-h" + GOLD_HELP_IDENTIFICATION_STRING = "Usage: gold_auto" + GOLD_QUIET = "-q" + + REMARK_TAG = "REMARK" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class GoldOutputEnum: + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + class CrestOutputEnum: COORD = "coord" @@ -1247,11 +1280,15 @@ class GromacsEnum: LIG_ID = "lig_id.lig" LIG_EXT = "lig" ATOMS = ["HETATM", "ATOM"] + ATOMS_DIRECTIVE = "[ atoms ]" + BONDS = "[ bonds ]" ATOMTYPES = "[ atomtypes ]" MOLECULETYPES = "[ moleculetype ]" - MOLECULES = "[ molecules ]\n" + MOLECULES = "[ molecules ]" SOLVENTS = ["HOH ", "SOL", "WAT"] TERMINATIONS = ["ENDMDL", "END"] + SYSTEM = "[ system ]" + DEFAULTS = "[ defaults ]" def __getattr__(self, name): if name in self: @@ -1318,6 +1355,8 @@ class StepPMXEnum: ABFE = "abfe" RBFE = "rbfe" + STRICT = "strict" + class PMXAtomMappingEnum: diff --git a/src/icolos/utils/enums/step_enums.py b/src/icolos/utils/enums/step_enums.py index 525aad4..988f3bb 100644 --- a/src/icolos/utils/enums/step_enums.py +++ b/src/icolos/utils/enums/step_enums.py @@ -17,6 +17,7 @@ class StepBaseEnum(str, Enum): STEP_EMBEDDING = "EMBEDDING" STEP_PREDICTION = "PREDICTION" STEP_MODEL_BUILDING = "MODEL_BUILDING" + STEP_FEATURE_COUNTER = "FEATURE_COUNTER" STEP_BOLTZMANN_WEIGHTING = "BOLTZMANN_WEIGHTING" STEP_PKA_PREDICTION = "PKA_PREDICTION" STEP_PRIME = "PRIME" @@ -30,13 +31,11 @@ class StepBaseEnum(str, Enum): STEP_PANTHER = "PANTHER" STEP_SHAEP = "SHAEP" STEP_PDB2GMX = "PDB2GMX" - STEP_PDB2GMX_LIG = "PDB2GMX_LIG" STEP_EDITCONF = "EDITCONF" STEP_SOLVATE = "SOLVATE" STEP_GENION = "GENION" STEP_GROMPP = "GROMPP" STEP_MDRUN = "MDRUN" - STEP_FEATURE_COUNTER = "FEATURE_COUNTER" STEP_TRJCONV = "TRJCONV" STEP_TRJCAT = "TRJCAT" STEP_GMX_RMSD = "GMX_RMSD" @@ -46,6 +45,7 @@ class StepBaseEnum(str, Enum): STEP_GLIDE = "GLIDE" STEP_AUTODOCKVINA_DOCKING = "VINA_DOCKING" STEP_AUTODOCKVINA_TARGET_PREPARATION = "VINA_TARGET_PREPARATION" + STEP_GOLD_DOCKING = "GOLD_DOCKING" STEP_FEP_PLUS_SETUP = "FEP_PLUS_SETUP" STEP_FEP_PLUS_EXEC = "FEP_PLUS_EXEC" STEP_FEP_PLUS_ANALYSIS = "FEP_PLUS_ANALYSIS" @@ -401,6 +401,41 @@ def __setattr__(self, key, value): raise ValueError("No changes allowed.") +class StepGoldEnum: + + CONFIGURATION = "configuration" + GOLD_CONFIG_FILE = "gold_config_file" + BLOCK_INDENT = " " + CONFIGURATION_START = "GOLD CONFIGURATION FILE" + AUTOMATIC_SETTINGS = "AUTOMATIC SETTINGS" + AUTOSCALE = "autoscale" + POPULATION = "POPULATION" + GENETIC_OPERATORS = "GENETIC OPERATORS" + FLOOD_FILL = "FLOOD FILL" + CAVITY_FILE = "cavity_file" + DATA_FILES = "DATA FILES" + LIGAND_DATA_FILE = "ligand_data_file" + FLAGS = "FLAGS" + TERMINATION = "TERMINATION" + CONSTRAINTS = "CONSTRAINTS" + COVALENT_BONDING = "COVALENT BONDING" + SAVE_OPTIONS = "SAVE OPTIONS" + FITNESS_FUNCTION_SETTINGS = "FITNESS FUNCTION SETTINGS" + GOLD_FITFUNC_PATH = "gold_fitfunc_path" + PROTEIN_DATA = "PROTEIN DATA" + PROTEIN_DATAFILE = "protein_datafile" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + class StepModelBuilderEnum: # configuration fields DATA = "data" @@ -700,16 +735,16 @@ class StepGromacsEnum: STD_TOPOL = "topol.top" STD_TPR = "structure.tpr" STD_XTC = "structure.xtc" - STD_STRUCTURE = "structure.gro" + STD_STRUCTURE = "confout.gro" POSRE_LIG = "posre_lig.itp" CHARGE_METHOD = "charge_method" FORCE_CONSTANTS = "1000 1000 1000" LIG_ID = "lig_id" COUPLING_GROUP = "Other" MMPBSA_IN = "mmpbsa.in" + THREADS = "threads" GROMACS_LOAD = "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" AMBERTOOLS_PREFIX = "ambertools_prefix" - AMBERTOOLS_LOAD = "module load AmberTools/21-fosscuda-2019a-Python-3.7.2" WATER_AND_IONS = "Water_and_ions" PROTEIN_OTHER = "Protein_Other" SIM_COMPLETE = "Finished mdrun" @@ -719,6 +754,15 @@ class StepGromacsEnum: LENGTHS = "lengths" COUPLING_GROUPS = "coupling_groups" DEFAULT_MMPBSA_IN = "src/icolos/config/amber/default_mmpbsa.in" + PARAM_METHOD = "param_method" + GAFF = "gaff" + OPENFF = "openff" + WATER_POSRE = """ +#ifdef POSRES_WATER +[ position_restraints ] +; i funct fcx fcy fcz + 1 1 1000 1000 1000 +#endif\n""" def __getattr__(self, name): if name in self: @@ -730,6 +774,14 @@ def __setattr__(self, key, value): raise ValueError("No changes allowed.") +class StepOpenFFEnum: + UNIQUE_MOLS = "unique_molecules" + METHOD = "method" + PARMED = "parmed" + INTERCHANGE = "interchange" + FORCEFIELD = "off_forcefield" + + class StepCavExploreEnum: FIELD_KEY_DTR = "dtr" FIELD_KEY_CMS = "cms" @@ -912,6 +964,19 @@ def __setattr__(self, key, value): raise ValueError("No changes allowed.") +class StepGoldTargetPreparationEnum: + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + class StepActiveLearningEnum: ORACLE_CONFIG = "oracle_config" diff --git a/src/icolos/utils/enums/step_initialization_enum.py b/src/icolos/utils/enums/step_initialization_enum.py index 587945b..8343bad 100644 --- a/src/icolos/utils/enums/step_initialization_enum.py +++ b/src/icolos/utils/enums/step_initialization_enum.py @@ -3,6 +3,7 @@ from icolos.core.workflow_steps.autodockvina.target_preparation import ( StepAutoDockVinaTargetPreparation, ) +from icolos.core.workflow_steps.ccdc.docking import StepGold from icolos.core.workflow_steps.calculation.electrostatics.esp_sim import StepEspSim from icolos.core.workflow_steps.calculation.feature_counter import StepFeatureCounter from icolos.core.workflow_steps.gromacs.do_dssp import StepGMXDoDSSP @@ -104,6 +105,7 @@ class StepInitializationEnum: _SBE.STEP_FEATURE_COUNTER: StepFeatureCounter, _SBE.STEP_AUTODOCKVINA_DOCKING: StepAutoDockVina, _SBE.STEP_AUTODOCKVINA_TARGET_PREPARATION: StepAutoDockVinaTargetPreparation, + _SBE.STEP_GOLD_DOCKING: StepGold, _SBE.STEP_PMX_RUN_SIMULATIONS: StepPMXRunSimulations, _SBE.STEP_DISPATCHER: StepDispatcher, _SBE.STEP_ESP_SIM: StepEspSim, diff --git a/src/icolos/utils/execute_external/gold.py b/src/icolos/utils/execute_external/gold.py new file mode 100644 index 0000000..303b1ac --- /dev/null +++ b/src/icolos/utils/execute_external/gold.py @@ -0,0 +1,41 @@ +from icolos.utils.enums.program_parameters import GoldEnum +from icolos.utils.execute_external.execute import ExecutorBase + +_EE = GoldEnum() + + +class GoldExecutor(ExecutorBase): + """For the execution of CCDC's GOLD.""" + + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + # check, whether a proper executable is provided + if command not in [_EE.GOLD_CALL]: + raise ValueError( + "Parameter command must be in dictionary of the internal AutoDock Vina executable list." + ) + + return super().execute( + command=command, + arguments=arguments, + check=check, + location=None, + pipe_input=pipe_input, + ) + + def is_available(self): + try: + result = self.execute( + command=_EE.VINA_CALL, arguments=[_EE.GOLD_HELP], check=True + ) + if result.returncode == 0: + return True + return False + except Exception as e: + return False diff --git a/src/icolos/utils/execute_external/slurm_executor.py b/src/icolos/utils/execute_external/slurm_executor.py index eac6674..dabd77a 100644 --- a/src/icolos/utils/execute_external/slurm_executor.py +++ b/src/icolos/utils/execute_external/slurm_executor.py @@ -25,9 +25,7 @@ def __init__( prefix_execution=None, binary_location=None, ): - super().__init__( - prefix_execution=prefix_execution, binary_location=binary_location - ) + super().__init__(prefix_execution=None, binary_location=None) self.cores = cores self.partition = partition @@ -36,6 +34,8 @@ def __init__( self.modules = modules self.other_args = other_args self.gres = gres + self._script_prefix_execution = prefix_execution + self._script_binary_location = binary_location def execute( self, @@ -119,12 +119,12 @@ def _prepare_command( command = pipe_input + " | " + command # check, if command (binary) is to be found at a specific location (rather than in $PATH) - if self._binary_location is not None: - command = os.path.join(self._binary_location, command) + if self._script_binary_location is not None: + command = os.path.join(self._script_binary_location, command) # check, if the something needs to be added before the execution of the "rDock" command if self._prefix_execution is not None: - command = self._prefix_execution + " && " + command + command = self._script_prefix_execution + " && " + command # execute; if "location" is set, change to this directory and execute there complete_command = command + " " + " ".join(str(e) for e in arguments) @@ -198,7 +198,8 @@ def _construct_slurm_header(self): f"#SBATCH -p {self.partition}", f"#SBATCH --time={self.time}", ] - header.append(f"#SBATCH --gres={self.gres}") + if self.gres is not None: + header.append(f"#SBATCH --gres={self.gres}") for key, value in self.other_args.items(): header.append(f"#SBATCH {key}={value}") diff --git a/src/icolos/utils/general/citation_generator.py b/src/icolos/utils/general/citation_generator.py new file mode 100644 index 0000000..42718e6 --- /dev/null +++ b/src/icolos/utils/general/citation_generator.py @@ -0,0 +1,54 @@ +import os +from typing import List + + +from icolos.core.workflow_steps.step import StepBase + +def print_citations(steps: List[StepBase], logger = None): + header = """ +===================================================================== +ooooo .oooooo. .oooooo. ooooo .oooooo. .oooooo..o +`888' d8P' `Y8b d8P' `Y8b `888' d8P' `Y8b d8P' `Y8 + 888 888 888 888 888 888 888 Y88bo. + 888 888 888 888 888 888 888 `"Y8888o. + 888 888 888 888 888 888 888 `"Y88b + 888 `88b ooo `88b d88' 888 o `88b d88' oo .d8P +o888o `Y8bood8P' `Y8bood8P' o888ooooood8 `Y8bood8P' 8""88888P' +=====================================================================\n""" + writeout_lines = header.split('\n') + writeout_lines.append("If you publish work using Icolos, please consider citing the following papers, based on the workflow's steps...\n\n") + citations = [] + for step in steps: + if "gmx" in step.type: + citations.append("\nGROMACS: https://doi.org/10.1016/j.softx.2015.06.001\nH.J.C. Berendsen, D. van der Spoel, and R. van Drunen, “GROMACS: A message-passing parallel molecular dynamics implementation,” Comp. Phys. Comm., 91 43–56 (1995)\nD. van der Spoel, E. Lindahl, B. Hess, G. Groenhof, A.E. Mark, and H.J.C. Berendsen, “GROMACS: Fast, Flexible and Free,” J. Comp. Chem., 26 1701–1718 (2005)\nH. Bekker, H.J.C. Berendsen, E.J. Dijkstra, S. Achterop, R. van Drunen, D. van der Spoel, A. Sijbers, and H. Keegstra et al., “Gromacs: A parallel computer for molecular dynamics simulations”; pp. 252–256 in Physics computing 92. Edited by R.A. de Groot and J. Nadrchal. World Scientific, Singapore, 1993") + if "pmx" in step.type: + citations.append("\nPMX: Vytautas Gapsys, Servaas Michielssens, Daniel Seeliger and Bert L. de Groot. Accurate and Rigorous Large Scale Mutation Free Energy Prediction. Angewandte Chemie Int. Ed. 55: 7364-7368(2016)\nVytautas Gapsys, Servaas Michielssens, Daniel Seeliger, and Bert L. de Groot. pmx: Automated protein structure and topology generation for alchemical perturbations. J. Comput. Chem. 36:348-354 (2015\n") + if "MMPBSA" in step.type: + citations.append("\ngmx_MMPBSA: Valdés-Tresanco, M.S., Valdés-Tresanco, M.E., Valiente, P.A. and Moreno E. gmx_MMPBSA: A New Tool to Perform End-State Free Energy Calculations with GROMACS. Journal of Chemical Theory and Computation, 2021 17 (10), 6281-6291\n") + if "turbomole" in step.type: + citations.append("\nTURBOMOLE: Modular program suite for ab initio quantum-chemical and condensed-matter simulations; J. Chem. Phys. 152, 184107 (2020); https://doi.org/10.1063/5.0004635\n") + if "espsim" in step.type: + citations.append("\nESPSim: https://doi.org/10.26434/chemrxiv-2021-sqvv9-v3\n") + if "vina" in step.type: + citations.append("\nAutoDock Vina: Eberhardt, J., Santos-Martins, D., Tillack, A.F., Forli, S. (2021). AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings. Journal of Chemical Information and Modeling.\nTrott, O., & Olson, A. J. (2010). AutoDock Vina: improving the speed and accuracy of docking with a new scoring function, efficient optimization, and multithreading. Journal of computational chemistry, 31(2), 455-461.\n") + + citations = list(set(citations)) + for c in citations: + writeout_lines.append(c) + writeout_lines.append("\n") + writeout_lines.append("=====================================================================\n") + + + + for line in writeout_lines: + try: + width = os.get_terminal_size().columns + except: + width= 150 + + print(line.center(width)) + + + + + \ No newline at end of file diff --git a/tests/CREST/test_CREST_confgen.py b/tests/CREST/test_CREST_confgen.py index 512834e..a5a4d4c 100644 --- a/tests/CREST/test_CREST_confgen.py +++ b/tests/CREST/test_CREST_confgen.py @@ -72,7 +72,7 @@ def test_coordinate_generation_neutral(self): out_path = os.path.join(self._test_dir, "CREST_conformers_paracetamol.sdf") crest_step.write_conformers(out_path) stat_inf = os.stat(out_path) - self.assertGreater(stat_inf.st_size, 32000) + self.assertGreater(stat_inf.st_size, 28000) def test_coordinate_generation_charged(self): step_conf = { diff --git a/tests/Turbomole/test_Turbomole.py b/tests/Turbomole/test_Turbomole.py index 1f4fb1f..4aebc1d 100644 --- a/tests/Turbomole/test_Turbomole.py +++ b/tests/Turbomole/test_Turbomole.py @@ -80,7 +80,7 @@ def test_Turbomole_run_ridft_single_core(self): .GetConformer(0) .GetPositions()[0] ), - [5.3347, 12.9328, 24.6745], + [0.8785, 0.6004, -0.2173], ) tm_step.execute() self.assertListEqual( @@ -150,7 +150,7 @@ def test_Turbomole_run_ridft_dual_core(self): .GetConformer(0) .GetPositions()[0] ), - [0.8785, 0.6004, -0.2173], + [5.3347, 12.9328, 24.6745], ) t1 = time.time() tm_step.execute() @@ -237,7 +237,7 @@ def test_Turbomole_run_jobex(self): .GetConformer(0) .GetPositions()[0] ), - [-2.1919, -3.3229, 0.3518], + [-0.7887, -0.0618, 0.1129], ) cosmofile = tm_step.get_compounds()[0][0][0].get_extra_data()[ _COE.EXTRA_DATA_COSMOFILE diff --git a/tests/XTB/test_XTB_confgen.py b/tests/XTB/test_XTB_confgen.py index 412fd5f..755e375 100644 --- a/tests/XTB/test_XTB_confgen.py +++ b/tests/XTB/test_XTB_confgen.py @@ -76,7 +76,7 @@ def test_coordinate_generation(self): .GetConformer(0) .GetPositions()[0] ), - [0.8785, 0.6004, -0.2173], + [5.3347, 12.9328, 24.6745], ) xtb_step.execute() self.assertListEqual( @@ -149,7 +149,7 @@ def test_single_core_execution(self): ) xtb_step.write_conformers(out_path) stat_inf = os.stat(out_path) - self.assertEqual(stat_inf.st_size, 6874) + self.assertEqual(stat_inf.st_size, 3967) def test_parallel_execution(self): step_conf = { @@ -204,4 +204,4 @@ def test_parallel_execution(self): ) xtb_step.write_conformers(out_path) stat_inf = os.stat(out_path) - self.assertEqual(stat_inf.st_size, 6874) + self.assertEqual(stat_inf.st_size, 3967) diff --git a/tests/ccdc/__init__.py b/tests/ccdc/__init__.py new file mode 100644 index 0000000..638ae74 --- /dev/null +++ b/tests/ccdc/__init__.py @@ -0,0 +1 @@ +from tests.ccdc.test_gold_docking import * diff --git a/tests/ccdc/test_gold_docking.py b/tests/ccdc/test_gold_docking.py new file mode 100644 index 0000000..7a1f4a7 --- /dev/null +++ b/tests/ccdc/test_gold_docking.py @@ -0,0 +1,157 @@ +import os +import unittest + +from icolos.core.workflow_steps.ccdc.docking import StepGold + +from icolos.utils.enums.step_enums import StepBaseEnum, StepGoldEnum + +from tests.tests_paths import ( + get_1UYD_ligands_as_Compounds, + PATHS_1UYD, + PATHS_EXAMPLEDATA +) +from icolos.utils.general.files_paths import attach_root_path + +_SBE = StepBaseEnum +_SGE = StepGoldEnum() + + +class Test_Gold_docking(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/Gold") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + self._1UYD_compounds = get_1UYD_ligands_as_Compounds( + abs_path=PATHS_1UYD.LIGANDS + )[:3] + self.cavity_file = attach_root_path(os.path.join("../IcolosData", PATHS_1UYD.GOLD_CAVITY_MOL2)) + self.receptor_path = attach_root_path(os.path.join("../IcolosData", PATHS_1UYD.GOLD_MOL2_PROTEIN)) + self.config_file_path = attach_root_path(os.path.join("../IcolosData", PATHS_EXAMPLEDATA.GOLD_EXAMPLE_CONFIG)) + + def test_config_generation(self): + step_conf = { + _SBE.STEPID: "01_Gold", + _SBE.STEP_TYPE: _SBE.STEP_GOLD_DOCKING, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load ccdc" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SGE.CONFIGURATION: { + _SGE.AUTOMATIC_SETTINGS: { + _SGE.AUTOSCALE: 0.5 + }, + _SGE.FLOOD_FILL: { + _SGE.CAVITY_FILE: "whatever" + }, + _SGE.PROTEIN_DATA: { + _SGE.PROTEIN_DATAFILE: "string" + } + } + }, + }, + } + + gold_step = StepGold(**step_conf) + + # check the two mandatory settings and one "overwrite" + self.assertEqual(gold_step.gold_additional.configuration.flood_fill.cavity_file, "whatever") + self.assertEqual(gold_step.gold_additional.configuration.protein_data.protein_datafile, "string") + self.assertEqual(gold_step.gold_additional.configuration.automatic_settings.autoscale, 0.5) + + # check config file size + config_path = os.path.join(self._test_dir, "test_config.cfg") + gold_step.generate_config_file(config_path, + ["/a/path/to/an/SDF/with/a/compound.sdf", + "/another/path.sdf"]) + stat_inf = os.stat(config_path) + self.assertGreater(stat_inf.st_size, 1400) + + def test_docking_default_config(self): + step_conf = { + _SBE.STEPID: "01_Gold", + _SBE.STEP_TYPE: _SBE.STEP_GOLD_DOCKING, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load ccdc" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SGE.CONFIGURATION: { + _SGE.AUTOMATIC_SETTINGS: { + _SGE.AUTOSCALE: 0.5 + }, + _SGE.FLOOD_FILL: { + _SGE.CAVITY_FILE: self.cavity_file + }, + _SGE.PROTEIN_DATA: { + _SGE.PROTEIN_DATAFILE: self.receptor_path + } + } + }, + }, + } + + gold_step = StepGold(**step_conf) + gold_step.data.compounds = self._1UYD_compounds + gold_step.execute() + + self.assertEqual(len(gold_step.get_compounds()), 3) + self.assertEqual(len(list( + gold_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + )), 3) + + # check SDF write-out + out_path = os.path.join(self._test_dir, "gold_docked.sdf") + gold_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 50000) + + def test_docking_with_config_file(self): + step_conf = { + _SBE.STEPID: "01_Gold", + _SBE.STEP_TYPE: _SBE.STEP_GOLD_DOCKING, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load ccdc" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SGE.GOLD_CONFIG_FILE: self.config_file_path + }, + }, + } + + gold_step = StepGold(**step_conf) + + # check config file size + config_path = os.path.join(self._test_dir, "test_config_file.cfg") + gold_step.generate_config_file(config_path, + ["/another/file1/whatever_1.sdf", + "/another/file1/whatever_111.sdf", + "/another/file1/whatever_1982734.sdf", + "/another/file1/whatever_1356638975934.sdf", + "/another/file1/whatever_33331.sdf", + "/another/file1/whatever_14642.sdf", + "/another/file1/whatever_1098604.sdf", + "/another/file1/whatever_2563241.sdf", + "/another/file1/whatever_1999374.sdf"]) + stat_inf = os.stat(config_path) + self.assertGreater(stat_inf.st_size, 1400) + diff --git a/tests/composite_agents/test_workflow.py b/tests/composite_agents/test_workflow.py index 37abeb1..20e4255 100644 --- a/tests/composite_agents/test_workflow.py +++ b/tests/composite_agents/test_workflow.py @@ -276,7 +276,7 @@ def test_workflow_execution(self): out_path = os.path.join(self._test_dir, "02a_omega_confgen.sdf") wflow[2].write_conformers(out_path) stat_inf = os.stat(out_path) - self.assertGreaterEqual(stat_inf.st_size, 4252) + self.assertGreaterEqual(stat_inf.st_size, 1800) out_path = os.path.join(self._test_dir, "02b_crest_confgen.sdf") wflow[3].write_conformers(out_path) stat_inf = os.stat(out_path) diff --git a/tests/flow_control/test_iterator.py b/tests/flow_control/test_iterator.py index 8871bc8..ed42d1e 100644 --- a/tests/flow_control/test_iterator.py +++ b/tests/flow_control/test_iterator.py @@ -1,7 +1,12 @@ import unittest from icolos.core.flow_control.iterator import StepIterator +from icolos.core.step_dispatch.dispatcher import StepDispatcher from icolos.core.workflow_steps.step import StepBase -from icolos.utils.enums.step_enums import StepBaseEnum, StepTurbomoleEnum +from icolos.utils.enums.step_enums import ( + StepBaseEnum, + StepGromacsEnum, + StepTurbomoleEnum, +) from icolos.utils.general.files_paths import attach_root_path from icolos.utils.enums.program_parameters import TurbomoleEnum import os @@ -13,6 +18,7 @@ _SBE = StepBaseEnum _TE = TurbomoleEnum() _STE = StepTurbomoleEnum() +_SGE = StepGromacsEnum() class TestIterator(unittest.TestCase): @@ -26,84 +32,167 @@ def setUp(self) -> None: with open(PATHS_EXAMPLEDATA.GROMACS_HOLO_STRUCTURE_GRO, "r") as f: self.structure = f.read() - with open(PATHS_EXAMPLEDATA.GROMACS_1BVG_TOP, "r") as f: - self.topol = f.read() + # def test_single_initialization(self): - with open(PATHS_EXAMPLEDATA.GROMACS_1BVG_TPR, "rb") as f: - self.tpr_file = f.read() + # full_conf = { + # "base_config": [ + # { + # _SBE.STEPID: "01_turbomole", + # _SBE.STEP_TYPE: _SBE.STEP_TURBOMOLE, + # _SBE.EXEC: { + # _SBE.EXEC_PREFIXEXECUTION: "module load turbomole/73", + # _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 1}, + # }, + # _SBE.SETTINGS: { + # _SBE.SETTINGS_ARGUMENTS: { + # _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + # _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + # }, + # _SBE.SETTINGS_ADDITIONAL: { + # _TE.TM_CONFIG_DIR: MAIN_CONFIG["TURBOMOLE_CONFIG"], + # _TE.TM_CONFIG_COSMO: os.path.join( + # MAIN_CONFIG["TURBOMOLE_CONFIG"], "cosmoprep_eps80.tm" + # ), + # _STE.EXECUTION_MODE: _TE.TM_RIDFT, + # }, + # }, + # } + # ], + # "iter_settings": { + # "settings": { + # "01_turbomole": { + # _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + # _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + # _SBE.SETTINGS_ADDITIONAL: { + # _TE.TM_CONFIG_BASENAME: [ + # "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge", + # "blyp-ri-d3-def2-svp-int-coarse-charge2", + # "some_other_spicy_functional", + # ] + # }, + # } + # }, + # "iter_mode": "single", + # "n_iters": 3, # for now this is manual, should match the number of settings to iterate over + # }, + # } - with open(PATHS_EXAMPLEDATA.GROMACS_1BVG_XTC, "rb") as f: - self.xtc_file = f.read() + # step_iterator = StepIterator(**full_conf) + # self.assertEqual(len(step_iterator.initialized_steps), 3) + # for i in step_iterator.initialized_steps: + # assert isinstance(i, StepBase) - with open(PATHS_EXAMPLEDATA.MMPBSA_POSRE, "r") as f: - self.posre = f.read() + # def test_multi_iter_initialization(self): - with open(PATHS_EXAMPLEDATA.MMPBSA_LIG_ITP, "r") as f: - self.lig_itp = f.read() + # full_conf = { + # "base_config": [ + # { + # _SBE.STEPID: "01_turbomole", + # _SBE.STEP_TYPE: _SBE.STEP_TURBOMOLE, + # _SBE.EXEC: { + # _SBE.EXEC_PREFIXEXECUTION: "module load turbomole/73", + # _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 1}, + # }, + # _SBE.SETTINGS: { + # _SBE.SETTINGS_ARGUMENTS: { + # _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + # _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + # }, + # _SBE.SETTINGS_ADDITIONAL: { + # _TE.TM_CONFIG_DIR: MAIN_CONFIG["TURBOMOLE_CONFIG"], + # _TE.TM_CONFIG_COSMO: os.path.join( + # MAIN_CONFIG["TURBOMOLE_CONFIG"], "cosmoprep_eps80.tm" + # ), + # _TE.TM_CONFIG_BASENAME: "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge", + # _STE.EXECUTION_MODE: _TE.TM_RIDFT, + # }, + # }, + # } + # ], + # "iter_settings": { + # # no changes in config, just run the same step through multiple times + # "iter_mode": "n_iters", + # "n_iters": 5, # for now this is manual, should match the number of settings to iterate over + # }, + # } - with open(PATHS_EXAMPLEDATA.MMPBSA_LIG_POSRE, "r") as f: - self.lig_posre = f.read() + # step_iterator = StepIterator(**full_conf) + # self.assertEqual(len(step_iterator.initialized_steps), 5) + # for i in step_iterator.initialized_steps: + # assert isinstance(i, StepBase) - def test_single_initialization(self): + def test_n_iters_4_cores_gromacs(self): + """ + Initialize 4 pdb2gmx jobs in separate workflows, + """ full_conf = { "base_config": [ { - _SBE.STEPID: "01_turbomole", - _SBE.STEP_TYPE: _SBE.STEP_TURBOMOLE, + _SBE.STEPID: "01_pdb2gmx", + _SBE.STEP_TYPE: "pdb2gmx", _SBE.EXEC: { - _SBE.EXEC_PREFIXEXECUTION: "module load turbomole/73", - _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 1}, + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" }, _SBE.SETTINGS: { _SBE.SETTINGS_ARGUMENTS: { - _SBE.SETTINGS_ARGUMENTS_FLAGS: [], - _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-ignh"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-water": "tip3p", + "-ff": "amber03", + }, }, - _SBE.SETTINGS_ADDITIONAL: { - _TE.TM_CONFIG_DIR: MAIN_CONFIG["TURBOMOLE_CONFIG"], - _TE.TM_CONFIG_COSMO: os.path.join( - MAIN_CONFIG["TURBOMOLE_CONFIG"], "cosmoprep_eps80.tm" - ), - _STE.EXECUTION_MODE: _TE.TM_RIDFT, + _SBE.SETTINGS_ADDITIONAL: {_SGE.CHARGE_METHOD: "gas"}, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: attach_root_path( + PATHS_EXAMPLEDATA.GROMACS_HOLO_STRUCTURE + ), + _SBE.INPUT_EXTENSION: "pdb", + } + ] + }, + }, + { + _SBE.STEPID: "02_editconf", + _SBE.STEP_TYPE: "editconf", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-d": "1.2", + "-bt": "dodecahedron", + }, }, + _SBE.SETTINGS_ADDITIONAL: {}, }, - } - ], - "iter_settings": { - "settings": { - "01_turbomole": { - _SBE.SETTINGS_ARGUMENTS_FLAGS: [], - _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, - _SBE.SETTINGS_ADDITIONAL: { - _TE.TM_CONFIG_BASENAME: [ - "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge", - "blyp-ri-d3-def2-svp-int-coarse-charge2", - "some_other_spicy_functional", - ] + _SBE.INPUT: {}, + }, + { + _SBE.STEPID: "03_solvate", + _SBE.STEP_TYPE: "solvate", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-cs": "spc216"}, }, - } + _SBE.SETTINGS_ADDITIONAL: {}, + }, + _SBE.INPUT: {_SBE.INPUT_GENERIC: []}, }, - "iter_mode": "single", - "n_iters": 3, # for now this is manual, should match the number of settings to iterate over - }, - } - - step_iterator = StepIterator(**full_conf) - self.assertEqual(len(step_iterator.initialized_steps), 3) - for i in step_iterator.initialized_steps: - assert isinstance(i, StepBase) - - def test_multi_iter_initialization(self): - - full_conf = { - "base_config": [ { - _SBE.STEPID: "01_turbomole", - _SBE.STEP_TYPE: _SBE.STEP_TURBOMOLE, + _SBE.STEPID: "04_grompp", + _SBE.STEP_TYPE: "grompp", _SBE.EXEC: { - _SBE.EXEC_PREFIXEXECUTION: "module load turbomole/73", - _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 1}, + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" }, _SBE.SETTINGS: { _SBE.SETTINGS_ARGUMENTS: { @@ -111,87 +200,73 @@ def test_multi_iter_initialization(self): _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, }, _SBE.SETTINGS_ADDITIONAL: { - _TE.TM_CONFIG_DIR: MAIN_CONFIG["TURBOMOLE_CONFIG"], - _TE.TM_CONFIG_COSMO: os.path.join( - MAIN_CONFIG["TURBOMOLE_CONFIG"], "cosmoprep_eps80.tm" - ), - _TE.TM_CONFIG_BASENAME: "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge", - _STE.EXECUTION_MODE: _TE.TM_RIDFT, + "-r": False, }, }, - } - ], - "iter_settings": { - # no changes in config, just run the same step through multiple times - "iter_mode": "n_iters", - "n_iters": 5, # for now this is manual, should match the number of settings to iterate over - }, - } - - step_iterator = StepIterator(**full_conf) - self.assertEqual(len(step_iterator.initialized_steps), 5) - for i in step_iterator.initialized_steps: - assert isinstance(i, StepBase) - - def test_single_initialization_parallel_execution(self): - """ - Test running multiple steps in parallel - """ - - full_conf = { - "base_config": [ + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: os.path.join( + MAIN_CONFIG["ICOLOS_TEST_DATA"], + "gromacs/protein/ions.mdp", + ), + _SBE.INPUT_EXTENSION: "mdp", + }, + ] + }, + }, { - _SBE.STEPID: "test_mmgbsa", - _SBE.STEP_TYPE: _SBE.STEP_GMX_MMPBSA, + _SBE.STEPID: "05_genion", + _SBE.STEP_TYPE: "genion", _SBE.EXEC: { - _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2 && module load gmx_MMPBSA/1.3.3-fosscuda-2019a-Python-3.7.2" + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" }, _SBE.SETTINGS: { _SBE.SETTINGS_ARGUMENTS: { - _SBE.SETTINGS_ARGUMENTS_FLAGS: [], - _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-neutral"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-pname": "NA", + "-nname": "CL", + }, }, _SBE.SETTINGS_ADDITIONAL: { - "make_ndx_command": "Protein Other", - "pipe_input": "Protein Other", + "pipe_input": "SOL", }, }, - } + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "04_grompp", + _SBE.INPUT_EXTENSION: "tpr", + }, + ] + }, + _SBE.WRITEOUT: [ + { + _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "top"}, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: f"{self._test_dir}/topol_out.top", + }, + } + ], + }, ], "iter_settings": { "n_iters": 4, # for now this is manual, should match the number of settings to iterate over "parallelizer_settings": { - "parallelize": True, "cores": 4, - "max_lenth_sublists": 1, }, }, } - step_mmpbsa_job_control = StepIterator(**full_conf) - # step_mmpbsa.data.generic.add_file( - # GenericData(file_name="structure.gro", file_data=self.structure) - # ) - # step_mmpbsa.data.generic.add_file( - # GenericData(file_name="topol.top", file_data=self.topol) - # ) - # step_mmpbsa.data.generic.add_file( - # GenericData(file_name="structure.xtc", file_data=self.xtc_file) - # ) - # step_mmpbsa.data.generic.add_file( - # GenericData(file_name="structure.tpr", file_data=self.tpr_file) - # ) - # step_mmpbsa.data.generic.add_file( - # GenericData(file_name="posre.itp", file_data=self.posre) - # ) - # step_mmpbsa.data.generic.add_file( - # GenericData(file_name="DMP:100.itp", file_data=self.lig_itp) - # ) - # step_mmpbsa.data.generic.add_file( - # GenericData(file_name="posre_DMP:100.itp", file_data=self.lig_posre) - # ) + step_gromacs_preprocess = StepIterator(**full_conf) # should return JobControl object - assert isinstance(step_mmpbsa_job_control.initialized_steps, StepBase) - # TODO: there isn't really a good way to unit test this, it is a pain to load the data in to the individual steps - # step_mmpbsa_job_control.initialized_steps.execute() + assert isinstance(step_gromacs_preprocess.dispatcher, StepDispatcher) + assert len(step_gromacs_preprocess.dispatcher.workflows) == 4 + + # instantiate the Dispatcher object + step_gromacs_preprocess.dispatcher.execute() + out_path = os.path.join(self._test_dir, "run_3", "topol_out_0.top") + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 1560) diff --git a/tests/gromacs/test_editconf.py b/tests/gromacs/test_editconf.py index f5ba4c7..5fe1b5d 100644 --- a/tests/gromacs/test_editconf.py +++ b/tests/gromacs/test_editconf.py @@ -1,6 +1,8 @@ from icolos.core.containers.generic import GenericData import unittest import os +from icolos.core.containers.gromacs_topol import GromacsTopol +from icolos.core.composite_agents.workflow import WorkFlow from icolos.core.workflow_steps.gromacs.editconf import StepGMXEditConf from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars @@ -23,7 +25,9 @@ def setUp(self): with open( attach_root_path(PATHS_EXAMPLEDATA.GROMACS_HOLO_STRUCTURE_GRO), "r" ) as f: - self.structure = f.read() + struct = f.readlines() + self.topol = GromacsTopol() + self.topol.structure = struct def test_editconf_run(self): step_conf = { @@ -44,13 +48,13 @@ def test_editconf_run(self): } step_editconf = StepGMXEditConf(**step_conf) - step_editconf.data.generic.add_file( - GenericData( - file_name="structure.gro", file_data=self.structure, argument=True - ) - ) + + wf = WorkFlow() + wf.workflow_data.gmx_topol = self.topol + step_editconf.set_workflow_object(wf) + step_editconf.execute() - out_path = os.path.join(self._test_dir, "structure.gro") - step_editconf.write_generic_by_name(self._test_dir, "structure.gro") + out_path = os.path.join(self._test_dir, "confout.gro") + step_editconf.write_generic_by_name(self._test_dir, "confout.gro") stat_inf = os.stat(out_path) self.assertEqual(stat_inf.st_size, 2102964) diff --git a/tests/gromacs/test_genion.py b/tests/gromacs/test_genion.py index 1515507..1fa36a7 100644 --- a/tests/gromacs/test_genion.py +++ b/tests/gromacs/test_genion.py @@ -1,4 +1,6 @@ +from icolos.core.composite_agents.workflow import WorkFlow from icolos.core.containers.generic import GenericData +from icolos.core.containers.gromacs_topol import GromacsTopol from icolos.core.workflow_steps.gromacs.genion import StepGMXGenion import unittest import os @@ -21,9 +23,16 @@ def setUpClass(cls): def setUp(self): with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_1BVG_TOP), "r") as f: - self.topol = f.read() + topol = f.readlines() with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_1BVG_TPR), "rb") as f: self.tpr = f.read() + with open( + attach_root_path(PATHS_EXAMPLEDATA.GROMACS_HOLO_STRUCTURE_GRO), "r" + ) as f: + struct = f.readlines() + self.topol = GromacsTopol() + self.topol.structure = struct + self.topol.top_lines = topol def test_genion_run(self): step_conf = { @@ -45,15 +54,15 @@ def test_genion_run(self): } step_genion = StepGMXGenion(**step_conf) - step_genion.data.generic.add_file( - GenericData(file_name="topol.top", file_data=self.topol, argument=True) - ) step_genion.data.generic.add_file( GenericData(file_name="structure.tpr", file_data=self.tpr, argument=True) ) + wf = WorkFlow() + wf.workflow_data.gmx_topol = self.topol + step_genion.set_workflow_object(wf) step_genion.execute() - out_path = os.path.join(self._test_dir, "structure.gro") - step_genion.write_generic_by_name(self._test_dir, "structure.gro") + out_path = os.path.join(self._test_dir, "confout.gro") + step_genion.write_generic_by_name(self._test_dir, "confout.gro") stat_inf = os.stat(out_path) self.assertGreater(stat_inf.st_size, 2102900) diff --git a/tests/gromacs/test_grompp.py b/tests/gromacs/test_grompp.py index 1f9b320..2bd3380 100644 --- a/tests/gromacs/test_grompp.py +++ b/tests/gromacs/test_grompp.py @@ -1,6 +1,8 @@ +from icolos.core.composite_agents.workflow import WorkFlow from icolos.core.containers.generic import GenericData import unittest import os +from icolos.core.containers.gromacs_topol import GromacsTopol from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum from tests.tests_paths import MAIN_CONFIG, PATHS_EXAMPLEDATA, export_unit_test_env_vars from icolos.core.workflow_steps.gromacs.grompp import StepGMXGrompp @@ -20,14 +22,22 @@ def setUpClass(cls): export_unit_test_env_vars() def setUp(self): - with open(PATHS_EXAMPLEDATA.GROMACS_HOLO_STRUCTURE_GRO, "r") as f: - self.structure = f.read() + with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_1BVG_TOP), "r") as f: + topol = f.readlines() + with open( + attach_root_path(PATHS_EXAMPLEDATA.GROMACS_HOLO_STRUCTURE_GRO), "r" + ) as f: + struct = f.readlines() with open(PATHS_EXAMPLEDATA.GROMACS_IONS_MDP, "r") as f: self.mdp = f.read() - with open(PATHS_EXAMPLEDATA.GROMACS_1BVG_TOP, "r") as f: - self.topol = f.read() - with open(PATHS_EXAMPLEDATA.MMPBSA_LIG_POSRE, "r") as f: - self.posre = f.read() + + self.topol = GromacsTopol() + self.topol.structure = struct + self.topol.top_lines = topol + self.topol.add_itp( + os.path.join(MAIN_CONFIG["ICOLOS_TEST_DATA"], "gromacs/protein"), + ["DMP:100.itp"], + ) def test_grompp(self): step_conf = { @@ -54,20 +64,13 @@ def test_grompp(self): } step_grompp = StepGMXGrompp(**step_conf) - step_grompp.data.generic.add_file( - GenericData( - file_name="tmp029389.gro", file_data=self.structure, argument=True - ) - ) + step_grompp.data.generic.add_file( GenericData(file_name="tmp03394.mdp", file_data=self.mdp, argument=True) ) - step_grompp.data.generic.add_file( - GenericData(file_name="tmp91023.top", file_data=self.topol, argument=True) - ) - step_grompp.data.generic.add_file( - GenericData(file_name="DMP:100.itp", file_data=self.posre, argument=True) - ) + wf = WorkFlow() + wf.workflow_data.gmx_topol = self.topol + step_grompp.set_workflow_object(wf) step_grompp.execute() diff --git a/tests/gromacs/test_mdrun.py b/tests/gromacs/test_mdrun.py index bde752f..f69711a 100644 --- a/tests/gromacs/test_mdrun.py +++ b/tests/gromacs/test_mdrun.py @@ -1,6 +1,8 @@ +from icolos.core.composite_agents.workflow import WorkFlow from icolos.core.containers.generic import GenericData import unittest import os +from icolos.core.containers.gromacs_topol import GromacsTopol from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars from icolos.utils.general.files_paths import attach_root_path @@ -39,12 +41,17 @@ def test_mdrun(self): } step_mdrun = StepGMXMDrun(**step_conf) + topol = GromacsTopol() + step_mdrun = StepGMXMDrun(**step_conf) + wf = WorkFlow() + wf.workflow_data.gmx_topol = topol + step_mdrun.set_workflow_object(wf) step_mdrun.data.generic.add_file( GenericData(file_name="structure.tpr", file_data=self.tpr, argument=True) ) step_mdrun.execute() - out_path = os.path.join(self._test_dir, "structure.gro") + out_path = os.path.join(self._test_dir, "confout.gro") step_mdrun.write_generic_by_extension(self._test_dir, "gro") stat_inf = os.stat(out_path) self.assertGreater(stat_inf.st_size, 3224400) @@ -69,14 +76,17 @@ def test_mdrun_slurm(self): } }, } - step_mdrun = StepGMXMDrun(**step_conf) + topol = GromacsTopol() + step_mdrun = StepGMXMDrun(**step_conf) + wf = WorkFlow() + wf.workflow_data.gmx_topol = topol + step_mdrun.set_workflow_object(wf) step_mdrun.data.generic.add_file( GenericData(file_name="structure.tpr", file_data=self.tpr, argument=True) ) step_mdrun.execute() - - out_path = os.path.join(self._test_dir, "structure.gro") + out_path = os.path.join(self._test_dir, "confout.gro") step_mdrun.write_generic_by_extension(self._test_dir, "gro") stat_inf = os.stat(out_path) self.assertEqual(stat_inf.st_size, 3224484) diff --git a/tests/gromacs/test_mmpbsa.py b/tests/gromacs/test_mmpbsa.py index 7a91eff..f0430e0 100644 --- a/tests/gromacs/test_mmpbsa.py +++ b/tests/gromacs/test_mmpbsa.py @@ -1,3 +1,5 @@ +from icolos.core.composite_agents.workflow import WorkFlow +from icolos.core.containers.gromacs_topol import GromacsTopol from icolos.core.workflow_steps.gromacs.mmpbsa import StepGMXmmpbsa from icolos.core.containers.generic import GenericData import unittest @@ -22,17 +24,13 @@ def setUpClass(cls) -> None: def setUp(self) -> None: with open(PATHS_EXAMPLEDATA.GROMACS_HOLO_STRUCTURE_GRO, "r") as f: - self.structure = f.read() - - with open(PATHS_EXAMPLEDATA.GROMACS_1BVG_TOP, "r") as f: - self.topol = f.read() + self.structure = f.readlines() with open(PATHS_EXAMPLEDATA.GROMACS_1BVG_TPR, "rb") as f: self.tpr_file = f.read() with open(PATHS_EXAMPLEDATA.GROMACS_1BVG_XTC, "rb") as f: self.xtc_file = f.read() - with open(PATHS_EXAMPLEDATA.MMPBSA_POSRE, "rb") as f: self.posre = f.read() @@ -42,12 +40,20 @@ def setUp(self) -> None: with open(PATHS_EXAMPLEDATA.MMPBSA_LIG_POSRE, "rb") as f: self.lig_posre = f.read() + self.topol = GromacsTopol() + self.topol.parse(PATHS_EXAMPLEDATA.GROMACS_1BVG_TOP) + self.topol.structure = self.structure + self.topol.add_itp( + os.path.join(MAIN_CONFIG["ICOLOS_TEST_DATA"], "gromacs/protein"), + ["DMP:100.itp"], + ) + def test_protein_lig_single_traj(self): step_conf = { _SBE.STEPID: "test_gmmpbsa", _SBE.STEP_TYPE: "gmx_mmpbsa", _SBE.EXEC: { - _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2 && module load gmx_MMPBSA && module load AmberTools/21-fosscuda-2019a-Python-3.7.2" + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2 && module load gmx_MMPBSA " }, _SBE.SETTINGS: { _SBE.SETTINGS_ARGUMENTS: { @@ -60,28 +66,20 @@ def test_protein_lig_single_traj(self): }, }, } + wf = WorkFlow() + wf.workflow_data.gmx_topol = self.topol + step_mmpbsa = StepGMXmmpbsa(**step_conf) - step_mmpbsa.data.generic.add_file( - GenericData(file_name="structure.gro", file_data=self.structure) - ) - step_mmpbsa.data.generic.add_file( - GenericData(file_name="topol.top", file_data=self.topol) - ) + step_mmpbsa.set_workflow_object(wf) step_mmpbsa.data.generic.add_file( GenericData(file_name="structure.xtc", file_data=self.xtc_file) ) step_mmpbsa.data.generic.add_file( GenericData(file_name="structure.tpr", file_data=self.tpr_file) ) - step_mmpbsa.data.generic.add_file( - GenericData(file_name="posre.itp", file_data=self.posre) - ) step_mmpbsa.data.generic.add_file( GenericData(file_name="DMP:100.itp", file_data=self.lig_itp) ) - step_mmpbsa.data.generic.add_file( - GenericData(file_name="posre_DMP:100.itp", file_data=self.lig_posre) - ) step_mmpbsa.execute() out_path = os.path.join(self._test_dir, "FINAL_RESULTS_MMPBSA.dat") step_mmpbsa.write_generic_by_extension(self._test_dir, "dat") @@ -94,7 +92,7 @@ def test_protein_lig_single_traj_custom_file(self): _SBE.STEPID: "test_gmmpbsa", _SBE.STEP_TYPE: "gmx_mmpbsa", _SBE.EXEC: { - _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2 && module load gmx_MMPBSA && module load AmberTools/21-fosscuda-2019a-Python-3.7.2" + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2 && module load gmx_MMPBSA" }, _SBE.SETTINGS: { _SBE.SETTINGS_ARGUMENTS: { @@ -105,36 +103,24 @@ def test_protein_lig_single_traj_custom_file(self): _SGE.FORCEFIELD: MAIN_CONFIG["FORCEFIELD"], _SGE.COUPLING_GROUPS: "Protein Other", _SGE.INPUT_FILE: PATHS_EXAMPLEDATA.MMPBSA_CUSTOM_INPUT, - "ntasks": 2, }, }, } + wf = WorkFlow() + wf.workflow_data.gmx_topol = self.topol + step_mmpbsa = StepGMXmmpbsa(**step_conf) - step_mmpbsa.data.generic.add_file( - GenericData(file_name="structure.gro", file_data=self.structure) - ) - step_mmpbsa.data.generic.add_file( - GenericData(file_name="topol.top", file_data=self.topol) - ) + step_mmpbsa.set_workflow_object(wf) step_mmpbsa.data.generic.add_file( GenericData(file_name="structure.xtc", file_data=self.xtc_file) ) step_mmpbsa.data.generic.add_file( GenericData(file_name="structure.tpr", file_data=self.tpr_file) ) - step_mmpbsa.data.generic.add_file( - GenericData(file_name="posre.itp", file_data=self.posre) - ) step_mmpbsa.data.generic.add_file( GenericData(file_name="DMP:100.itp", file_data=self.lig_itp) ) - step_mmpbsa.data.generic.add_file( - GenericData(file_name="posre_DMP:100.itp", file_data=self.lig_posre) - ) - t1 = time() step_mmpbsa.execute() - exec_time = time() - t1 - print("single traj exec time, custom input", exec_time) out_path = os.path.join(self._test_dir, "FINAL_RESULTS_MMPBSA.dat") step_mmpbsa.write_generic_by_extension(self._test_dir, "dat") stat_inf = os.stat(out_path) diff --git a/tests/gromacs/test_pdb2gmx.py b/tests/gromacs/test_pdb2gmx.py index c9d81b7..f648001 100644 --- a/tests/gromacs/test_pdb2gmx.py +++ b/tests/gromacs/test_pdb2gmx.py @@ -1,21 +1,31 @@ +import shutil +from icolos.core.composite_agents.workflow import WorkFlow from icolos.core.containers.generic import GenericData import unittest import os -from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum -from tests.tests_paths import PATHS_1UYD, PATHS_EXAMPLEDATA, export_unit_test_env_vars +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum, StepOpenFFEnum +from tests.tests_paths import ( + MAIN_CONFIG, + PATHS_1UYD, + PATHS_EXAMPLEDATA, + export_unit_test_env_vars, +) from icolos.utils.general.files_paths import attach_root_path from icolos.core.workflow_steps.gromacs.pdb2gmx import StepGMXPdb2gmx _SGE = StepGromacsEnum() _SBE = StepBaseEnum +_SOFE = StepOpenFFEnum() class Test_Pdb2gmx(unittest.TestCase): @classmethod def setUpClass(cls): cls._test_dir = attach_root_path("tests/junk/gromacs") - if not os.path.isdir(cls._test_dir): - os.makedirs(cls._test_dir) + + if os.path.isdir(cls._test_dir): + shutil.rmtree(cls._test_dir) + os.makedirs(cls._test_dir, exist_ok=True) export_unit_test_env_vars() @@ -24,6 +34,8 @@ def setUp(self): self.structure = f.read() with open(PATHS_EXAMPLEDATA.GROMACS_1BVG_PDB, "r") as f: self.holo_structure = f.read() + with open(PATHS_EXAMPLEDATA.GROMACS_DMP_LIGAND_SDF, "r") as f: + self.dmp_ligand = f.read() def test_pdb2gmx_run(self): step_conf = { @@ -45,20 +57,21 @@ def test_pdb2gmx_run(self): } step_pdb2gmx = StepGMXPdb2gmx(**step_conf) + step_pdb2gmx._workflow_object = WorkFlow() step_pdb2gmx.data.generic.add_file( GenericData( file_name="structure.pdb", file_data=self.structure, argument=True ) ) step_pdb2gmx.execute() - out_path = os.path.join(self._test_dir, "structure.gro") + out_path = os.path.join(self._test_dir, "confout.gro") step_pdb2gmx.write_generic_by_extension( self._test_dir, _SGE.FIELD_KEY_STRUCTURE ) stat_inf = os.stat(out_path) self.assertGreater(stat_inf.st_size, 22300) - def test_lig_param(self): + def test_lig_param_gaff(self): step_conf = { _SBE.STEPID: "test_pdb2gmx_lig_param", _SBE.STEP_TYPE: "pdb2gmx", @@ -79,15 +92,72 @@ def test_lig_param(self): } step_lig_param = StepGMXPdb2gmx(**step_conf) + step_lig_param._workflow_object = WorkFlow() step_lig_param.data.generic.add_file( GenericData( file_name="tmp_whatever01923.pdb", file_data=self.holo_structure ) ) step_lig_param.execute() - out_path = os.path.join(self._test_dir, "structure.gro") + out_path = os.path.join(self._test_dir, "confout.gro") step_lig_param.write_generic_by_extension( self._test_dir, _SGE.FIELD_KEY_STRUCTURE ) stat_inf = os.stat(out_path) self.assertGreater(stat_inf.st_size, 73800) + + # out_path = os.path.join(self._test_dir, "topol.top") + # step_lig_param.write_generic_by_extension(self._test_dir, _SGE.FIELD_KEY_TOPOL) + # stat_inf = os.stat(out_path) + # self.assertGreater(stat_inf.st_size, 73800) + + def test_lig_param_openff(self): + step_conf = { + _SBE.STEPID: "test_pdb2gmx_lig_param", + _SBE.STEP_TYPE: "pdb2gmx", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ADDITIONAL: {}, + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-ignh"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-water": "tip4p", + "-ff": "amber03", + }, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SGE.CHARGE_METHOD: "gas", + _SGE.PARAM_METHOD: _SGE.OPENFF, + _SOFE.FORCEFIELD: os.path.join( + MAIN_CONFIG["OPENFF_FORCEFIELDS"], "openff-2.0.0.offxml" + ), + "lig_pdb": PATHS_1UYD.NATIVE_LIGAND_PDB, + _SOFE.UNIQUE_MOLS: "COc1c(OC)cc(c(Cl)c1OC)Cc(n2)n(CCCC)c(c23)ncnc3N", + }, + }, + } + + step_pdb2gmx = StepGMXPdb2gmx(**step_conf) + step_pdb2gmx._workflow_object = WorkFlow() + step_pdb2gmx.data.generic.add_file( + GenericData( + file_name="tmp_whatever01923.pdb", file_data=self.holo_structure + ) + ) + step_pdb2gmx.data.generic.add_file( + GenericData(file_name="DMP.sdf", file_data=self.dmp_ligand, argument=True) + ) + step_pdb2gmx.execute() + out_path = os.path.join(self._test_dir, "confout.gro") + step_pdb2gmx.write_generic_by_extension( + self._test_dir, _SGE.FIELD_KEY_STRUCTURE + ) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 73800) + + # out_path = os.path.join(self._test_dir, "topol.top") + # step_lig_param.write_generic_by_extension(self._test_dir, _SGE.FIELD_KEY_TOPOL) + # stat_inf = os.stat(out_path) + # self.assertGreater(stat_inf.st_size, 73800) diff --git a/tests/gromacs/test_solvate.py b/tests/gromacs/test_solvate.py index 184aec2..c4147f4 100644 --- a/tests/gromacs/test_solvate.py +++ b/tests/gromacs/test_solvate.py @@ -1,6 +1,8 @@ +from icolos.core.composite_agents.workflow import WorkFlow from icolos.core.containers.generic import GenericData import unittest import os +from icolos.core.containers.gromacs_topol import GromacsTopol from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars from icolos.utils.general.files_paths import attach_root_path @@ -20,10 +22,12 @@ def setUpClass(cls): export_unit_test_env_vars() def setUp(self): - with open(PATHS_EXAMPLEDATA.GROMACS_1BVG_TOP, "r") as f: - self.topol = f.read() with open(PATHS_EXAMPLEDATA.GROMACS_HOLO_STRUCTURE_GRO, "r") as f: - self.structure = f.read() + self.structure = f.readlines() + + self.topol = GromacsTopol() + self.topol.parse(PATHS_EXAMPLEDATA.GROMACS_1BVG_TOP) + self.topol.structure = self.structure def test_solvate(self): step_conf = { @@ -41,18 +45,12 @@ def test_solvate(self): } step_solvate = StepGMXSolvate(**step_conf) - step_solvate.data.generic.add_file( - GenericData( - file_name="structure.gro", file_data=self.structure, argument=True - ) - ) - step_solvate.data.generic.add_file( - GenericData(file_name="topol.top", file_data=self.topol, argument=True) - ) - + wf = WorkFlow() + wf.workflow_data.gmx_topol = self.topol + step_solvate.set_workflow_object(wf) step_solvate.execute() - out_path = os.path.join(self._test_dir, "structure.gro") + out_path = os.path.join(self._test_dir, "confout.gro") step_solvate.write_generic_by_extension( self._test_dir, _SGE.FIELD_KEY_STRUCTURE ) diff --git a/tests/integration_tests/test_gromacs.py b/tests/integration_tests/test_gromacs.py index 5e05c3b..480f71e 100644 --- a/tests/integration_tests/test_gromacs.py +++ b/tests/integration_tests/test_gromacs.py @@ -16,7 +16,7 @@ _SGE = StepGromacsEnum() -class Test_MD_Fpocket(unittest.TestCase): +class Test_GROMACS_MD(unittest.TestCase): @classmethod def setUpClass(cls): cls._test_dir = attach_root_path("tests/junk/integration") @@ -122,18 +122,7 @@ def test_workflow_MD_fpocket_holo(self): }, _SBE.SETTINGS_ADDITIONAL: {}, }, - _SBE.INPUT: { - _SBE.INPUT_GENERIC: [ - { - _SBE.INPUT_SOURCE: "02_editconf", - _SBE.INPUT_EXTENSION: "gro", - }, - { - _SBE.INPUT_SOURCE: "01_pdb2gmx", - _SBE.INPUT_EXTENSION: "top", - }, - ] - }, + _SBE.INPUT: {_SBE.INPUT_GENERIC: []}, }, { _SBE.STEPID: "04_grompp", @@ -152,22 +141,10 @@ def test_workflow_MD_fpocket_holo(self): }, _SBE.INPUT: { _SBE.INPUT_GENERIC: [ - { - _SBE.INPUT_SOURCE: "03_solvate", - _SBE.INPUT_EXTENSION: "gro", - }, { _SBE.INPUT_SOURCE: "{file_base}/ions.mdp", _SBE.INPUT_EXTENSION: "mdp", - }, - { - _SBE.INPUT_SOURCE: "03_solvate", - _SBE.INPUT_EXTENSION: "top", - }, - { - _SBE.INPUT_SOURCE: "01_pdb2gmx", - _SBE.INPUT_EXTENSION: "itp", - }, + } ] }, }, @@ -194,11 +171,7 @@ def test_workflow_MD_fpocket_holo(self): { _SBE.INPUT_SOURCE: "04_grompp", _SBE.INPUT_EXTENSION: "tpr", - }, - { - _SBE.INPUT_SOURCE: "04_grompp", - _SBE.INPUT_EXTENSION: "top", - }, + } ] }, }, @@ -219,22 +192,10 @@ def test_workflow_MD_fpocket_holo(self): }, _SBE.INPUT: { _SBE.INPUT_GENERIC: [ - { - _SBE.INPUT_SOURCE: "05_genion", - _SBE.INPUT_EXTENSION: "gro", - }, { _SBE.INPUT_SOURCE: "{file_base}/minim.mdp", _SBE.INPUT_EXTENSION: "mdp", - }, - { - _SBE.INPUT_SOURCE: "05_genion", - _SBE.INPUT_EXTENSION: "top", - }, - { - _SBE.INPUT_SOURCE: "01_pdb2gmx", - _SBE.INPUT_EXTENSION: "itp", - }, + } ] }, }, @@ -277,22 +238,10 @@ def test_workflow_MD_fpocket_holo(self): }, _SBE.INPUT: { _SBE.INPUT_GENERIC: [ - { - _SBE.INPUT_SOURCE: "07_eminim_mdrun", - _SBE.INPUT_EXTENSION: "gro", - }, - { - _SBE.INPUT_SOURCE: "05_genion", - _SBE.INPUT_EXTENSION: "top", - }, { _SBE.INPUT_SOURCE: "{file_base}/nvt_equil.mdp", _SBE.INPUT_EXTENSION: "mdp", - }, - { - _SBE.INPUT_SOURCE: "01_pdb2gmx", - _SBE.INPUT_EXTENSION: "itp", - }, + } ] }, }, @@ -336,22 +285,10 @@ def test_workflow_MD_fpocket_holo(self): }, _SBE.INPUT: { _SBE.INPUT_GENERIC: [ - { - _SBE.INPUT_SOURCE: "09_nvt_mdrun", - _SBE.INPUT_EXTENSION: "gro", - }, - { - _SBE.INPUT_SOURCE: "05_genion", - _SBE.INPUT_EXTENSION: "top", - }, { _SBE.INPUT_SOURCE: "{file_base}/npt_equil.mdp", _SBE.INPUT_EXTENSION: "mdp", - }, - { - _SBE.INPUT_SOURCE: "01_pdb2gmx", - _SBE.INPUT_EXTENSION: "itp", - }, + } ] }, }, @@ -396,22 +333,10 @@ def test_workflow_MD_fpocket_holo(self): }, _SBE.INPUT: { _SBE.INPUT_GENERIC: [ - { - _SBE.INPUT_SOURCE: "11_npt_mdrun", - _SBE.INPUT_EXTENSION: "gro", - }, - { - _SBE.INPUT_SOURCE: "05_genion", - _SBE.INPUT_EXTENSION: "top", - }, { _SBE.INPUT_SOURCE: "{file_base}/md.mdp", _SBE.INPUT_EXTENSION: "mdp", - }, - { - _SBE.INPUT_SOURCE: "01_pdb2gmx", - _SBE.INPUT_EXTENSION: "itp", - }, + } ] }, }, @@ -578,10 +503,6 @@ def test_md_ligparam(self): _WE.ENVIRONMENT_EXPORT_KEY: "GMX_FORCE_UPDATE_DEFAULT_GPU", _WE.ENVIRONMENT_EXPORT_VALUE: "True", }, - { - _WE.ENVIRONMENT_EXPORT_KEY: "ACPYPE", - _WE.ENVIRONMENT_EXPORT_VALUE: "/projects/cc/mai/binaries/acpype", - }, ] }, _WE.GLOBAL_VARIABLES: { @@ -628,20 +549,13 @@ def test_md_ligparam(self): _SBE.SETTINGS_ARGUMENTS: { _SBE.SETTINGS_ARGUMENTS_FLAGS: [], _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { - "-d": "1.5", + "-d": "1.2", "-bt": "dodecahedron", }, }, _SBE.SETTINGS_ADDITIONAL: {}, }, - _SBE.INPUT: { - _SBE.INPUT_GENERIC: [ - { - _SBE.INPUT_SOURCE: "01_pdb2gmx", - _SBE.INPUT_EXTENSION: "gro", - } - ] - }, + _SBE.INPUT: {}, }, { _SBE.STEPID: "03_solvate", @@ -656,18 +570,7 @@ def test_md_ligparam(self): }, _SBE.SETTINGS_ADDITIONAL: {}, }, - _SBE.INPUT: { - _SBE.INPUT_GENERIC: [ - { - _SBE.INPUT_SOURCE: "02_editconf", - _SBE.INPUT_EXTENSION: "gro", - }, - { - _SBE.INPUT_SOURCE: "01_pdb2gmx", - _SBE.INPUT_EXTENSION: "top", - }, - ] - }, + _SBE.INPUT: {_SBE.INPUT_GENERIC: []}, }, { _SBE.STEPID: "04_grompp", @@ -686,22 +589,10 @@ def test_md_ligparam(self): }, _SBE.INPUT: { _SBE.INPUT_GENERIC: [ - { - _SBE.INPUT_SOURCE: "03_solvate", - _SBE.INPUT_EXTENSION: "gro", - }, { _SBE.INPUT_SOURCE: "{file_base}/ions.mdp", _SBE.INPUT_EXTENSION: "mdp", }, - { - _SBE.INPUT_SOURCE: "03_solvate", - _SBE.INPUT_EXTENSION: "top", - }, - { - _SBE.INPUT_SOURCE: "01_pdb2gmx", - _SBE.INPUT_EXTENSION: "itp", - }, ] }, }, @@ -729,10 +620,6 @@ def test_md_ligparam(self): _SBE.INPUT_SOURCE: "04_grompp", _SBE.INPUT_EXTENSION: "tpr", }, - { - _SBE.INPUT_SOURCE: "04_grompp", - _SBE.INPUT_EXTENSION: "top", - }, ] }, }, @@ -745,7 +632,7 @@ def test_md_ligparam(self): _SBE.SETTINGS: { _SBE.SETTINGS_ARGUMENTS: { _SBE.SETTINGS_ARGUMENTS_FLAGS: [], - _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-maxwarn": 50}, }, _SBE.SETTINGS_ADDITIONAL: { "-r": False, @@ -753,22 +640,10 @@ def test_md_ligparam(self): }, _SBE.INPUT: { _SBE.INPUT_GENERIC: [ - { - _SBE.INPUT_SOURCE: "05_genion", - _SBE.INPUT_EXTENSION: "gro", - }, { _SBE.INPUT_SOURCE: "{file_base}/minim.mdp", _SBE.INPUT_EXTENSION: "mdp", }, - { - _SBE.INPUT_SOURCE: "05_genion", - _SBE.INPUT_EXTENSION: "top", - }, - { - _SBE.INPUT_SOURCE: "01_pdb2gmx", - _SBE.INPUT_EXTENSION: "itp", - }, ] }, }, @@ -813,22 +688,10 @@ def test_md_ligparam(self): }, _SBE.INPUT: { _SBE.INPUT_GENERIC: [ - { - _SBE.INPUT_SOURCE: "07_eminim_mdrun", - _SBE.INPUT_EXTENSION: "gro", - }, - { - _SBE.INPUT_SOURCE: "05_genion", - _SBE.INPUT_EXTENSION: "top", - }, { _SBE.INPUT_SOURCE: "{file_base}/nvt_equil.mdp", _SBE.INPUT_EXTENSION: "mdp", }, - { - _SBE.INPUT_SOURCE: "01_pdb2gmx", - _SBE.INPUT_EXTENSION: "itp", - }, ] }, }, @@ -875,22 +738,10 @@ def test_md_ligparam(self): }, _SBE.INPUT: { _SBE.INPUT_GENERIC: [ - { - _SBE.INPUT_SOURCE: "09_nvt_mdrun", - _SBE.INPUT_EXTENSION: "gro", - }, - { - _SBE.INPUT_SOURCE: "05_genion", - _SBE.INPUT_EXTENSION: "top", - }, { _SBE.INPUT_SOURCE: "{file_base}/npt_equil.mdp", _SBE.INPUT_EXTENSION: "mdp", }, - { - _SBE.INPUT_SOURCE: "01_pdb2gmx", - _SBE.INPUT_EXTENSION: "itp", - }, ] }, }, @@ -935,28 +786,14 @@ def test_md_ligparam(self): "-r": False, "fields": {"nsteps": "5000"}, "make_ndx_command": "auto", - "fields": {"nsteps": "5000"}, - "make_ndx_command": "auto", }, }, _SBE.INPUT: { _SBE.INPUT_GENERIC: [ - { - _SBE.INPUT_SOURCE: "11_npt_mdrun", - _SBE.INPUT_EXTENSION: "gro", - }, - { - _SBE.INPUT_SOURCE: "05_genion", - _SBE.INPUT_EXTENSION: "top", - }, { _SBE.INPUT_SOURCE: "{file_base}/md.mdp", _SBE.INPUT_EXTENSION: "mdp", }, - { - _SBE.INPUT_SOURCE: "01_pdb2gmx", - _SBE.INPUT_EXTENSION: "itp", - }, ] }, }, @@ -1068,4 +905,4 @@ def test_md_ligparam(self): out_path = os.path.join(self._test_dir, "md_0_1_0.xtc") stat_inf = os.stat(out_path) - self.assertGreater(stat_inf.st_size, 324000) + self.assertGreater(stat_inf.st_size, 316516) diff --git a/tests/integration_tests/test_pmx_rbfe.py b/tests/integration_tests/test_pmx_rbfe.py index 6efbf5a..b36d5d3 100644 --- a/tests/integration_tests/test_pmx_rbfe.py +++ b/tests/integration_tests/test_pmx_rbfe.py @@ -302,4 +302,4 @@ def test_pmx_rbfe(self): wflow.execute() out_path = os.path.join(self._test_dir, "resultsAll.csv") stat_inf = os.stat(out_path) - self.assertEqual(stat_inf.st_size, 100) + self.assertEqual(stat_inf.st_size, 223) diff --git a/tests/io/test_data_manipulation.py b/tests/io/test_data_manipulation.py index ebeef7b..2a6f389 100644 --- a/tests/io/test_data_manipulation.py +++ b/tests/io/test_data_manipulation.py @@ -224,7 +224,7 @@ def test_get_complexes(self): out_path = os.path.join(self._test_dir, "0:0:0.pdb") manip_step.write_generic_by_extension(self._test_dir, "pdb") stat_inf = os.stat(out_path) - self.assertGreater(stat_inf.st_size, 279700) + self.assertGreater(stat_inf.st_size, 6600) def test_filtering(self): step_conf = { diff --git a/tests/openff/__init__.py b/tests/openff/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/openff/test_openff2gmx.py b/tests/openff/test_openff2gmx.py new file mode 100644 index 0000000..2043944 --- /dev/null +++ b/tests/openff/test_openff2gmx.py @@ -0,0 +1,62 @@ +import unittest +import os +from icolos.core.containers.generic import GenericData +from icolos.core.workflow_steps.openff.openff2gmx import StepOFF2gmx +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum, StepOpenFFEnum +from tests.tests_paths import ( + MAIN_CONFIG, + PATHS_1UYD, + PATHS_EXAMPLEDATA, + create_test_dir, + export_unit_test_env_vars, +) +from icolos.utils.general.files_paths import attach_root_path + +_SBE = StepBaseEnum +_SOFE = StepOpenFFEnum() +_SGE = StepGromacsEnum() + + +class Test_PMXgenlib(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/openff/openff2gmx") + + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + with open(PATHS_1UYD.NATIVE_LIGAND_PDB, "r") as f: + pdb_data = f.readlines() + self.pdb_obj = GenericData(file_name="1UYD.pdb", file_data=pdb_data) + + def test_openff_ligand(self): + step_conf = { + _SBE.STEPID: "test_pdb2gmx", + _SBE.STEP_TYPE: "openff2gmx", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ADDITIONAL: { + _SOFE.UNIQUE_MOLS: [ + "COc1c(OC)cc(c(Cl)c1OC)Cc(n2)n(CCCC)c(c23)ncnc3N" + ], + _SOFE.FORCEFIELD: os.path.join( + MAIN_CONFIG["OPENFF_FORCEFIELDS"], "openff-2.0.0.offxml" + ), + }, + }, + } + step_openff2gmx = StepOFF2gmx(**step_conf) + step_openff2gmx.data.generic.add_file(self.pdb_obj) + step_openff2gmx.execute() + + out_path = os.path.join(self._test_dir, "MOL.gro") + step_openff2gmx.write_generic_by_extension( + self._test_dir, _SGE.FIELD_KEY_STRUCTURE + ) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 2400) diff --git a/tests/pmx/test_run_simulations.py b/tests/pmx/test_run_simulations.py index 0dde986..44a2143 100644 --- a/tests/pmx/test_run_simulations.py +++ b/tests/pmx/test_run_simulations.py @@ -80,7 +80,7 @@ def test_run_simulations_em(self): os.path.join(self._test_dir, "0ec09ef_4afa8f9/ligand/stateB/run1/em/md.log") ) - self.assertGreater(stat_inf.st_size, 1642929) + self.assertGreater(stat_inf.st_size, 1296303) stat_inf = os.stat( os.path.join( diff --git a/tests/schrodinger/test_fep_plus_setup.py b/tests/schrodinger/test_fep_plus_setup.py index e5d82f8..915b970 100644 --- a/tests/schrodinger/test_fep_plus_setup.py +++ b/tests/schrodinger/test_fep_plus_setup.py @@ -89,4 +89,4 @@ def test_fep_setup(self): path=os.path.join(self._test_dir, "test_out.fmp"), ext="fmp", join=False ) stat_inf = os.stat(out_path) - self.assertAlmostEqual(stat_inf.st_size, 848697, delta=500) + self.assertAlmostEqual(stat_inf.st_size, 847971, delta=500) diff --git a/tests/tests_paths.py b/tests/tests_paths.py index 6bc3219..5334bd5 100644 --- a/tests/tests_paths.py +++ b/tests/tests_paths.py @@ -19,7 +19,8 @@ attach_root_path("src/icolos/config/unit_tests_config/config.json"), "r" ) as f: MAIN_CONFIG = json.load(f) -except: +except Exception as e: + print(e) MAIN_CONFIG = {} @@ -57,8 +58,11 @@ class PATHS_1UYD: GRID_CONSTRAINTS_PATH = expand_path("Glide/1UYD_grid_constraints.zip") PDBQT_PATH = expand_path("AutoDockVina/1UYD_fixed.pdbqt") PDB_PATH = expand_path("molecules/1UYD/1UYD_apo.pdb") + HOLO_PDB = expand_path("molecules/1UYD/1UYD_holo.pdb") APO_MAE = expand_path("molecules/1UYD/1UYD_apo.mae") LIGANDS = expand_path("molecules/1UYD/1UYD_ligands.sdf") + GOLD_MOL2_PROTEIN = "Gold/1UYD_protein.mol2" + GOLD_CAVITY_MOL2 = "molecules/1UYD/PU8_reference_ligand.mol2" NATIVE_LIGAND_SDF = expand_path("molecules/1UYD/PU8_native_ligand.sdf") NATIVE_LIGAND_PDB = expand_path("molecules/1UYD/PU8_native_ligand.pdb") LIG4_POSES = expand_path("fep_plus/1UYD_ligand_subset.sdf") @@ -98,6 +102,7 @@ class PATHS_EXAMPLEDATA: "models/ePSA_Boltzmann_weighting.sdf" ) GLIDE_EXAMPLE_IN = expand_path("Glide/example.in") + GOLD_EXAMPLE_CONFIG = "Gold/gold.conf" EPSA_EXAMPLE_MOLECULE = expand_path("models/ePSA_example_mol.sdf") PRIME_RECEPTOR_COX2 = expand_path("prime/cox2_receptor.mae") PRIME_COX2_GRID = expand_path("molecules/1CX2/1cx2_GridGen.zip")