From 046469f6dc925cf4696dca6d3c0345776265d018 Mon Sep 17 00:00:00 2001 From: kdmh016 Date: Fri, 28 Jan 2022 16:42:12 +0100 Subject: [PATCH] Version 1.4.2, reinitialized. --- .gitignore | 16 + CHANGELOG.md | 68 + DOCKERFILE | 7 + LICENCE | 169 ++ README.md | 75 + environment_min.yml | 24 + examples/hpc_script/NIBR_reinvent.sh | 11 + examples/hpc_script/SLURM_script.sh | 11 + examples/workflow/desmond_md.json | 72 + .../docking/active_learning_docking.json | 68 + examples/workflow/docking/adv_docking.json | 120 ++ .../docking/adv_target_preparation.json | 44 + examples/workflow/docking/glide_docking.json | 122 ++ examples/workflow/fep_plus/fep_plus_aws.json | 171 ++ .../gromacs/gromacs_ensemble_mmgbsa.json | 619 +++++++ .../workflow/gromacs/gromacs_fpocket.json | 656 ++++++++ examples/workflow/gromacs/gromacs_md.json | 488 ++++++ examples/workflow/gromacs/gromacs_mmgbsa.json | 580 +++++++ examples/workflow/input_file_types.json | 91 ++ examples/workflow/pmx/pmx_rbfe.json | 376 +++++ examples/workflow/qm/ePSA_permeability.json | 334 ++++ examples/workflow/qm/full_rescoss.json | 464 ++++++ .../workflow/reinvent/feature_counter.json | 71 + .../reinvent/nibr_local_reinvent.json | 159 ++ .../rescoring/negative_image_panther.json | 58 + examples/workflow/rescoring/nibr_local.json | 161 ++ .../workflow/rescoring/rmsd_rescoring.json | 223 +++ executor.py | 95 ++ external_documentation/REINVENT_input.json | 4 + external_documentation/REINVENT_result.json | 9 + external_documentation/fep_mapper.txt | 102 ++ external_documentation/fep_plus.txt | 109 ++ external_documentation/prime_arguments.txt | 180 +++ icolos/__init__.py | 0 icolos/config/amber/default_mmpbsa.in | 6 + icolos/config/cosmo/default_cosmo.config | 28 + icolos/config/desmond/config.msj | 23 + icolos/config/desmond/prod.cfg | 69 + icolos/config/desmond/production.msj | 102 ++ icolos/config/logging/debug.json | 75 + icolos/config/logging/default.json | 75 + icolos/config/logging/tutorial.json | 75 + icolos/config/logging/verbose.json | 75 + icolos/config/panther/default_panther.in | 74 + .../unit_tests_config/cosmo/cosmo.config | 28 + .../cosmo/example.cosmo.config | 28 + .../unit_tests_config/example.config.json | 30 + icolos/config_containers/__init__.py | 0 icolos/config_containers/container.py | 33 + .../config_containers/workflow_container.py | 13 + icolos/core/__init__.py | 0 icolos/core/composite_agents/__init__.py | 0 icolos/core/composite_agents/base_agent.py | 108 ++ icolos/core/composite_agents/scheduler.py | 54 + icolos/core/composite_agents/workflow.py | 143 ++ icolos/core/containers/__init__.py | 0 icolos/core/containers/compound.py | 549 +++++++ icolos/core/containers/generic.py | 210 +++ icolos/core/containers/perturbation_map.py | 294 ++++ icolos/core/flow_control/__init__.py | 0 icolos/core/flow_control/flow_control.py | 68 + icolos/core/flow_control/iterator.py | 223 +++ icolos/core/job_control/__init__.py | 0 icolos/core/job_control/job_control.py | 89 + icolos/core/step_utils/__init__.py | 0 icolos/core/step_utils/input_merger.py | 114 ++ icolos/core/step_utils/input_preparator.py | 535 ++++++ icolos/core/step_utils/rdkit_utils.py | 10 + icolos/core/step_utils/retry.py | 42 + .../core/step_utils/run_variables_resolver.py | 67 + icolos/core/step_utils/sdconvert_util.py | 68 + icolos/core/step_utils/step_writeout.py | 507 ++++++ icolos/core/step_utils/structcat_util.py | 68 + icolos/core/step_utils/structconvert.py | 69 + icolos/core/steps_utils.py | 22 + icolos/core/workflow_steps/__init__.py | 0 .../workflow_steps/autodockvina/__init__.py | 0 .../workflow_steps/autodockvina/docking.py | 324 ++++ .../autodockvina/target_preparation.py | 137 ++ .../workflow_steps/calculation/__init__.py | 0 .../core/workflow_steps/calculation/base.py | 52 + .../calculation/boltzmann_weighting.py | 98 ++ .../workflow_steps/calculation/clustering.py | 140 ++ .../core/workflow_steps/calculation/cosmo.py | 311 ++++ .../calculation/electrostatics/__init__.py | 0 .../calculation/electrostatics/cresset_ec.py | 108 ++ .../calculation/electrostatics/esp_sim.py | 152 ++ .../calculation/feature_counter.py | 64 + .../workflow_steps/calculation/panther.py | 152 ++ .../workflow_steps/calculation/rms_filter.py | 97 ++ .../core/workflow_steps/calculation/rmsd.py | 47 + .../core/workflow_steps/calculation/shaep.py | 77 + .../workflow_steps/calculation/turbomole.py | 440 +++++ .../cavity_explorer/__init__.py | 0 .../workflow_steps/cavity_explorer/base.py | 69 + .../cavity_explorer/mdpocket.py | 306 ++++ .../core/workflow_steps/confgen/__init__.py | 0 icolos/core/workflow_steps/confgen/base.py | 7 + icolos/core/workflow_steps/confgen/crest.py | 121 ++ icolos/core/workflow_steps/confgen/omega.py | 111 ++ icolos/core/workflow_steps/confgen/xtb.py | 170 ++ .../core/workflow_steps/gromacs/__init__.py | 10 + icolos/core/workflow_steps/gromacs/base.py | 195 +++ icolos/core/workflow_steps/gromacs/cluster.py | 81 + .../workflow_steps/gromacs/clusters_ts.py | 88 + icolos/core/workflow_steps/gromacs/do_dssp.py | 57 + .../core/workflow_steps/gromacs/editconf.py | 57 + icolos/core/workflow_steps/gromacs/genion.py | 69 + icolos/core/workflow_steps/gromacs/grompp.py | 125 ++ icolos/core/workflow_steps/gromacs/mdrun.py | 67 + icolos/core/workflow_steps/gromacs/mmpbsa.py | 161 ++ icolos/core/workflow_steps/gromacs/pdb2gmx.py | 455 ++++++ icolos/core/workflow_steps/gromacs/rsmd.py | 59 + icolos/core/workflow_steps/gromacs/solvate.py | 46 + icolos/core/workflow_steps/gromacs/trajcat.py | 54 + icolos/core/workflow_steps/gromacs/trjconv.py | 50 + icolos/core/workflow_steps/io/__init__.py | 0 icolos/core/workflow_steps/io/base.py | 10 + .../workflow_steps/io/data_manipulation.py | 248 +++ icolos/core/workflow_steps/io/embedder.py | 136 ++ .../workflow_steps/io/initialize_compound.py | 20 + icolos/core/workflow_steps/pmx/__init__.py | 14 + icolos/core/workflow_steps/pmx/abfe.py | 149 ++ .../workflow_steps/pmx/assemble_systems.py | 53 + icolos/core/workflow_steps/pmx/atomMapping.py | 86 + icolos/core/workflow_steps/pmx/base.py | 255 +++ .../core/workflow_steps/pmx/box_water_ions.py | 58 + icolos/core/workflow_steps/pmx/doublebox.py | 33 + icolos/core/workflow_steps/pmx/genlib.py | 68 + icolos/core/workflow_steps/pmx/gentop.py | 46 + .../core/workflow_steps/pmx/ligandHybrid.py | 121 ++ icolos/core/workflow_steps/pmx/mutate.py | 67 + .../workflow_steps/pmx/prepare_simulations.py | 51 + .../workflow_steps/pmx/prepare_transitions.py | 48 + .../core/workflow_steps/pmx/run_analysis.py | 47 + .../workflow_steps/pmx/run_simulations.py | 58 + .../core/workflow_steps/pmx/setup_workpath.py | 192 +++ .../workflow_steps/prediction/__init__.py | 0 .../prediction/active_learning.py | 267 +++ .../prediction/model_building.py | 269 ++++ .../workflow_steps/prediction/predictor.py | 86 + .../workflow_steps/schrodinger/__init__.py | 9 + .../core/workflow_steps/schrodinger/base.py | 366 +++++ .../schrodinger/desmond_exec.py | 114 ++ .../schrodinger/desmond_preprocessor.py | 75 + .../schrodinger/fep_analysis.py | 27 + .../workflow_steps/schrodinger/fep_base.py | 211 +++ .../schrodinger/fep_plus_execution.py | 192 +++ .../schrodinger/fep_plus_setup.py | 303 ++++ .../core/workflow_steps/schrodinger/glide.py | 635 ++++++++ .../workflow_steps/schrodinger/ligprep.py | 322 ++++ .../workflow_steps/schrodinger/macromodel.py | 160 ++ .../workflow_steps/schrodinger/prepwizard.py | 90 ++ .../core/workflow_steps/schrodinger/prime.py | 239 +++ icolos/core/workflow_steps/step.py | 494 ++++++ .../structure_prediction/__init__.py | 0 .../structure_prediction/disicl.py | 0 .../structure_prediction/dssp.py | 56 + .../structure_prediction/pdb_fixer.py | 66 + .../structure_prediction/peptide_embedder.py | 36 + .../structure_prediction/rosetta_abinitio.py | 15 + icolos/loggers/__init__.py | 0 icolos/loggers/agentlogger.py | 12 + icolos/loggers/base_logger.py | 27 + icolos/loggers/blank_logger.py | 14 + icolos/loggers/entrypoint_logger.py | 12 + icolos/loggers/iologger.py | 12 + icolos/loggers/logger_utils.py | 4 + icolos/loggers/steplogger.py | 12 + icolos/scripts/__init__.py | 0 icolos/scripts/cli.py | 103 ++ icolos/utils/__init__.py | 0 icolos/utils/constants.py | 2 + .../utils/entry_point_functions/__init__.py | 0 .../logging_helper_functions.py | 29 + .../parsing_functions.py | 75 + icolos/utils/enums/__init__.py | 0 icolos/utils/enums/composite_agents_enums.py | 59 + icolos/utils/enums/compound_enums.py | 56 + icolos/utils/enums/entry_points.py | 15 + icolos/utils/enums/execution_enums.py | 13 + icolos/utils/enums/flow_control_enums.py | 15 + icolos/utils/enums/general_utils_enums.py | 15 + icolos/utils/enums/input_enums.py | 19 + icolos/utils/enums/logging_enums.py | 31 + icolos/utils/enums/parallelization.py | 18 + icolos/utils/enums/program_parameters.py | 1428 +++++++++++++++++ icolos/utils/enums/step_enums.py | 922 +++++++++++ .../utils/enums/step_initialization_enum.py | 110 ++ icolos/utils/enums/write_out_enums.py | 47 + icolos/utils/execute_external/__init__.py | 0 icolos/utils/execute_external/autodockvina.py | 41 + .../utils/execute_external/batch_executor.py | 145 ++ .../execute_external/cresset_executor.py | 40 + icolos/utils/execute_external/crest.py | 41 + icolos/utils/execute_external/execute.py | 101 ++ icolos/utils/execute_external/fep_plus.py | 48 + icolos/utils/execute_external/glide.py | 44 + icolos/utils/execute_external/gromacs.py | 53 + icolos/utils/execute_external/ifd_executor.py | 40 + .../execute_external/license_token_guard.py | 127 ++ icolos/utils/execute_external/ligprep.py | 47 + icolos/utils/execute_external/macromodel.py | 46 + icolos/utils/execute_external/omega.py | 42 + icolos/utils/execute_external/openbabel.py | 43 + icolos/utils/execute_external/pmx.py | 54 + icolos/utils/execute_external/prime.py | 43 + icolos/utils/execute_external/schrodinger.py | 58 + icolos/utils/execute_external/sdconvert.py | 46 + icolos/utils/execute_external/structcat.py | 46 + icolos/utils/execute_external/turbomole.py | 62 + icolos/utils/execute_external/xtb.py | 40 + icolos/utils/general/__init__.py | 0 .../utils/general/arparse_bool_extension.py | 14 + icolos/utils/general/convenience_functions.py | 60 + icolos/utils/general/files_paths.py | 105 ++ icolos/utils/general/icolos_exceptions.py | 25 + icolos/utils/general/molecules.py | 20 + icolos/utils/general/notifications.py | 0 icolos/utils/general/parallelization.py | 159 ++ icolos/utils/general/print_log.py | 14 + icolos/utils/general/progress_bar.py | 7 + icolos/utils/general/strings.py | 8 + icolos/utils/smiles.py | 145 ++ icolos_workflow.py | 65 + integration_tests.py | 5 + licences/espsim_licence.txt | 21 + pyproject.toml | 3 + sdf2smi.py | 99 ++ setup.py | 17 + tests/CREST/__init__.py | 1 + tests/CREST/test_CREST_confgen.py | 115 ++ tests/OMEGA/__init__.py | 1 + tests/OMEGA/test_OMEGA_confgen.py | 124 ++ tests/Turbomole/__init__.py | 1 + tests/Turbomole/test_Turbomole.py | 253 +++ tests/XTB/__init__.py | 1 + tests/XTB/test_XTB_confgen.py | 207 +++ tests/__init__.py | 0 tests/autodockvina/__init__.py | 2 + tests/autodockvina/test_adv_docking.py | 95 ++ tests/autodockvina/test_adv_target_prep.py | 90 ++ tests/boltzmann_weighting/__init__.py | 1 + .../test_boltzmann_weighting.py | 130 ++ tests/cavity_explorer/__init__.py | 1 + tests/cavity_explorer/test_md_pocket.py | 115 ++ tests/clustering/__init__.py | 1 + tests/clustering/test_clustering.py | 91 ++ tests/composite_agents/__init__.py | 1 + tests/composite_agents/test_workflow.py | 451 ++++++ tests/containers/__init__.py | 3 + tests/containers/test_compound.py | 113 ++ tests/containers/test_generic.py | 32 + tests/containers/test_perturbation_map.py | 44 + tests/cosmo/__init__.py | 1 + tests/cosmo/test_Cosmo.py | 174 ++ tests/esp_sim/__init__.py | 1 + tests/esp_sim/test_esp_sim.py | 85 + tests/feature_counter/__init__.py | 1 + tests/feature_counter/test_feature_counter.py | 102 ++ tests/flow_control/__init__.py | 1 + tests/flow_control/test_iterator.py | 197 +++ tests/gromacs/__init__.py | 14 + tests/gromacs/test_cluster.py | 70 + tests/gromacs/test_cluster_ts.py | 55 + tests/gromacs/test_do_dssp.py | 54 + tests/gromacs/test_editconf.py | 54 + tests/gromacs/test_genion.py | 59 + tests/gromacs/test_grompp.py | 72 + tests/gromacs/test_mdrun.py | 70 + tests/gromacs/test_mmpbsa.py | 142 ++ tests/gromacs/test_pdb2gmx.py | 91 ++ tests/gromacs/test_removelig.py | 1 + tests/gromacs/test_rmsd.py | 56 + tests/gromacs/test_solvate.py | 60 + tests/gromacs/test_trjcat.py | 56 + tests/gromacs/test_trjconv.py | 53 + tests/integration_tests/__init__.py | 5 + tests/integration_tests/test_docking.py | 239 +++ tests/integration_tests/test_fep_plus.py | 224 +++ tests/integration_tests/test_gromacs.py | 1076 +++++++++++++ tests/integration_tests/test_rmsd_iter.py | 344 ++++ tests/io/__init__.py | 3 + tests/io/test_data_manipulation.py | 325 ++++ tests/io/test_embedder.py | 135 ++ tests/io/test_initialize_compound.py | 240 +++ tests/panther/__init__.py | 1 + tests/panther/test_panther.py | 48 + tests/pmx/__init__.py | 11 + tests/pmx/test_abfe.py | 62 + tests/pmx/test_analyse.py | 65 + tests/pmx/test_assemble_systems.py | 77 + tests/pmx/test_atomMapping.py | 67 + tests/pmx/test_box_water_ions.py | 70 + tests/pmx/test_doublebox.py | 20 + tests/pmx/test_genlib.py | 20 + tests/pmx/test_gentop.py | 20 + tests/pmx/test_ligandHybrid.py | 70 + tests/pmx/test_mutate.py | 20 + tests/pmx/test_prepare_simulations.py | 73 + tests/pmx/test_prepare_transitions.py | 95 ++ tests/pmx/test_run_simulations.py | 113 ++ tests/pmx/test_setup_workpath.py | 80 + tests/prediction/__init__.py | 3 + tests/prediction/test_active_learning.py | 100 ++ tests/prediction/test_model_building.py | 91 ++ tests/prediction/test_predictor.py | 68 + tests/rms_filter/__init__.py | 1 + tests/rms_filter/test_rmsfilter.py | 203 +++ tests/rmsd/__init__.py | 1 + tests/rmsd/test_rmsd.py | 112 ++ tests/schrodinger/__init__.py | 10 + tests/schrodinger/test_desmond_production.py | 48 + tests/schrodinger/test_desmond_setup.py | 48 + tests/schrodinger/test_fep_analysis.py | 105 ++ tests/schrodinger/test_fep_plus_execution.py | 124 ++ tests/schrodinger/test_fep_plus_setup.py | 92 ++ tests/schrodinger/test_glide.py | 511 ++++++ tests/schrodinger/test_ligprep.py | 220 +++ tests/schrodinger/test_macromodel.py | 93 ++ tests/schrodinger/test_prepwizard.py | 118 ++ tests/schrodinger/test_prime.py | 195 +++ tests/shaep/__init__.py | 1 + tests/shaep/test_shaep.py | 72 + tests/step_utils/__init__.py | 4 + tests/step_utils/test_input_merger.py | 262 +++ tests/step_utils/test_input_preparator.py | 101 ++ .../step_utils/test_run_variables_resolver.py | 164 ++ tests/step_utils/test_structconvert.py | 32 + tests/step_utils/test_writeout.py | 335 ++++ tests/structure_prediction/__init__.py | 5 + tests/structure_prediction/test_dssp.py | 52 + tests/structure_prediction/test_pdb_fixer.py | 63 + .../test_peptide_embedder.py | 51 + tests/tests_paths.py | 396 +++++ unit_tests.py | 29 + 336 files changed, 35659 insertions(+) create mode 100644 .gitignore create mode 100644 CHANGELOG.md create mode 100644 DOCKERFILE create mode 100644 LICENCE create mode 100644 README.md create mode 100644 environment_min.yml create mode 100644 examples/hpc_script/NIBR_reinvent.sh create mode 100644 examples/hpc_script/SLURM_script.sh create mode 100644 examples/workflow/desmond_md.json create mode 100644 examples/workflow/docking/active_learning_docking.json create mode 100644 examples/workflow/docking/adv_docking.json create mode 100644 examples/workflow/docking/adv_target_preparation.json create mode 100644 examples/workflow/docking/glide_docking.json create mode 100644 examples/workflow/fep_plus/fep_plus_aws.json create mode 100644 examples/workflow/gromacs/gromacs_ensemble_mmgbsa.json create mode 100644 examples/workflow/gromacs/gromacs_fpocket.json create mode 100644 examples/workflow/gromacs/gromacs_md.json create mode 100644 examples/workflow/gromacs/gromacs_mmgbsa.json create mode 100644 examples/workflow/input_file_types.json create mode 100644 examples/workflow/pmx/pmx_rbfe.json create mode 100644 examples/workflow/qm/ePSA_permeability.json create mode 100644 examples/workflow/qm/full_rescoss.json create mode 100644 examples/workflow/reinvent/feature_counter.json create mode 100644 examples/workflow/reinvent/nibr_local_reinvent.json create mode 100644 examples/workflow/rescoring/negative_image_panther.json create mode 100644 examples/workflow/rescoring/nibr_local.json create mode 100644 examples/workflow/rescoring/rmsd_rescoring.json create mode 100644 executor.py create mode 100644 external_documentation/REINVENT_input.json create mode 100644 external_documentation/REINVENT_result.json create mode 100644 external_documentation/fep_mapper.txt create mode 100644 external_documentation/fep_plus.txt create mode 100644 external_documentation/prime_arguments.txt create mode 100644 icolos/__init__.py create mode 100644 icolos/config/amber/default_mmpbsa.in create mode 100644 icolos/config/cosmo/default_cosmo.config create mode 100644 icolos/config/desmond/config.msj create mode 100644 icolos/config/desmond/prod.cfg create mode 100644 icolos/config/desmond/production.msj create mode 100644 icolos/config/logging/debug.json create mode 100644 icolos/config/logging/default.json create mode 100644 icolos/config/logging/tutorial.json create mode 100644 icolos/config/logging/verbose.json create mode 100644 icolos/config/panther/default_panther.in create mode 100644 icolos/config/unit_tests_config/cosmo/cosmo.config create mode 100644 icolos/config/unit_tests_config/cosmo/example.cosmo.config create mode 100644 icolos/config/unit_tests_config/example.config.json create mode 100644 icolos/config_containers/__init__.py create mode 100644 icolos/config_containers/container.py create mode 100644 icolos/config_containers/workflow_container.py create mode 100644 icolos/core/__init__.py create mode 100644 icolos/core/composite_agents/__init__.py create mode 100644 icolos/core/composite_agents/base_agent.py create mode 100644 icolos/core/composite_agents/scheduler.py create mode 100644 icolos/core/composite_agents/workflow.py create mode 100644 icolos/core/containers/__init__.py create mode 100644 icolos/core/containers/compound.py create mode 100644 icolos/core/containers/generic.py create mode 100644 icolos/core/containers/perturbation_map.py create mode 100644 icolos/core/flow_control/__init__.py create mode 100644 icolos/core/flow_control/flow_control.py create mode 100644 icolos/core/flow_control/iterator.py create mode 100644 icolos/core/job_control/__init__.py create mode 100644 icolos/core/job_control/job_control.py create mode 100644 icolos/core/step_utils/__init__.py create mode 100644 icolos/core/step_utils/input_merger.py create mode 100644 icolos/core/step_utils/input_preparator.py create mode 100644 icolos/core/step_utils/rdkit_utils.py create mode 100644 icolos/core/step_utils/retry.py create mode 100644 icolos/core/step_utils/run_variables_resolver.py create mode 100644 icolos/core/step_utils/sdconvert_util.py create mode 100644 icolos/core/step_utils/step_writeout.py create mode 100644 icolos/core/step_utils/structcat_util.py create mode 100644 icolos/core/step_utils/structconvert.py create mode 100644 icolos/core/steps_utils.py create mode 100644 icolos/core/workflow_steps/__init__.py create mode 100644 icolos/core/workflow_steps/autodockvina/__init__.py create mode 100644 icolos/core/workflow_steps/autodockvina/docking.py create mode 100644 icolos/core/workflow_steps/autodockvina/target_preparation.py create mode 100644 icolos/core/workflow_steps/calculation/__init__.py create mode 100644 icolos/core/workflow_steps/calculation/base.py create mode 100644 icolos/core/workflow_steps/calculation/boltzmann_weighting.py create mode 100644 icolos/core/workflow_steps/calculation/clustering.py create mode 100644 icolos/core/workflow_steps/calculation/cosmo.py create mode 100644 icolos/core/workflow_steps/calculation/electrostatics/__init__.py create mode 100644 icolos/core/workflow_steps/calculation/electrostatics/cresset_ec.py create mode 100644 icolos/core/workflow_steps/calculation/electrostatics/esp_sim.py create mode 100644 icolos/core/workflow_steps/calculation/feature_counter.py create mode 100644 icolos/core/workflow_steps/calculation/panther.py create mode 100644 icolos/core/workflow_steps/calculation/rms_filter.py create mode 100644 icolos/core/workflow_steps/calculation/rmsd.py create mode 100644 icolos/core/workflow_steps/calculation/shaep.py create mode 100644 icolos/core/workflow_steps/calculation/turbomole.py create mode 100644 icolos/core/workflow_steps/cavity_explorer/__init__.py create mode 100644 icolos/core/workflow_steps/cavity_explorer/base.py create mode 100644 icolos/core/workflow_steps/cavity_explorer/mdpocket.py create mode 100644 icolos/core/workflow_steps/confgen/__init__.py create mode 100644 icolos/core/workflow_steps/confgen/base.py create mode 100644 icolos/core/workflow_steps/confgen/crest.py create mode 100644 icolos/core/workflow_steps/confgen/omega.py create mode 100644 icolos/core/workflow_steps/confgen/xtb.py create mode 100644 icolos/core/workflow_steps/gromacs/__init__.py create mode 100644 icolos/core/workflow_steps/gromacs/base.py create mode 100644 icolos/core/workflow_steps/gromacs/cluster.py create mode 100644 icolos/core/workflow_steps/gromacs/clusters_ts.py create mode 100644 icolos/core/workflow_steps/gromacs/do_dssp.py create mode 100644 icolos/core/workflow_steps/gromacs/editconf.py create mode 100644 icolos/core/workflow_steps/gromacs/genion.py create mode 100644 icolos/core/workflow_steps/gromacs/grompp.py create mode 100644 icolos/core/workflow_steps/gromacs/mdrun.py create mode 100644 icolos/core/workflow_steps/gromacs/mmpbsa.py create mode 100644 icolos/core/workflow_steps/gromacs/pdb2gmx.py create mode 100644 icolos/core/workflow_steps/gromacs/rsmd.py create mode 100644 icolos/core/workflow_steps/gromacs/solvate.py create mode 100644 icolos/core/workflow_steps/gromacs/trajcat.py create mode 100644 icolos/core/workflow_steps/gromacs/trjconv.py create mode 100644 icolos/core/workflow_steps/io/__init__.py create mode 100644 icolos/core/workflow_steps/io/base.py create mode 100644 icolos/core/workflow_steps/io/data_manipulation.py create mode 100644 icolos/core/workflow_steps/io/embedder.py create mode 100644 icolos/core/workflow_steps/io/initialize_compound.py create mode 100644 icolos/core/workflow_steps/pmx/__init__.py create mode 100644 icolos/core/workflow_steps/pmx/abfe.py create mode 100644 icolos/core/workflow_steps/pmx/assemble_systems.py create mode 100644 icolos/core/workflow_steps/pmx/atomMapping.py create mode 100644 icolos/core/workflow_steps/pmx/base.py create mode 100644 icolos/core/workflow_steps/pmx/box_water_ions.py create mode 100644 icolos/core/workflow_steps/pmx/doublebox.py create mode 100644 icolos/core/workflow_steps/pmx/genlib.py create mode 100644 icolos/core/workflow_steps/pmx/gentop.py create mode 100644 icolos/core/workflow_steps/pmx/ligandHybrid.py create mode 100644 icolos/core/workflow_steps/pmx/mutate.py create mode 100644 icolos/core/workflow_steps/pmx/prepare_simulations.py create mode 100644 icolos/core/workflow_steps/pmx/prepare_transitions.py create mode 100644 icolos/core/workflow_steps/pmx/run_analysis.py create mode 100644 icolos/core/workflow_steps/pmx/run_simulations.py create mode 100644 icolos/core/workflow_steps/pmx/setup_workpath.py create mode 100644 icolos/core/workflow_steps/prediction/__init__.py create mode 100644 icolos/core/workflow_steps/prediction/active_learning.py create mode 100644 icolos/core/workflow_steps/prediction/model_building.py create mode 100644 icolos/core/workflow_steps/prediction/predictor.py create mode 100644 icolos/core/workflow_steps/schrodinger/__init__.py create mode 100644 icolos/core/workflow_steps/schrodinger/base.py create mode 100644 icolos/core/workflow_steps/schrodinger/desmond_exec.py create mode 100644 icolos/core/workflow_steps/schrodinger/desmond_preprocessor.py create mode 100644 icolos/core/workflow_steps/schrodinger/fep_analysis.py create mode 100644 icolos/core/workflow_steps/schrodinger/fep_base.py create mode 100644 icolos/core/workflow_steps/schrodinger/fep_plus_execution.py create mode 100644 icolos/core/workflow_steps/schrodinger/fep_plus_setup.py create mode 100644 icolos/core/workflow_steps/schrodinger/glide.py create mode 100644 icolos/core/workflow_steps/schrodinger/ligprep.py create mode 100644 icolos/core/workflow_steps/schrodinger/macromodel.py create mode 100644 icolos/core/workflow_steps/schrodinger/prepwizard.py create mode 100644 icolos/core/workflow_steps/schrodinger/prime.py create mode 100644 icolos/core/workflow_steps/step.py create mode 100644 icolos/core/workflow_steps/structure_prediction/__init__.py create mode 100644 icolos/core/workflow_steps/structure_prediction/disicl.py create mode 100644 icolos/core/workflow_steps/structure_prediction/dssp.py create mode 100644 icolos/core/workflow_steps/structure_prediction/pdb_fixer.py create mode 100644 icolos/core/workflow_steps/structure_prediction/peptide_embedder.py create mode 100644 icolos/core/workflow_steps/structure_prediction/rosetta_abinitio.py create mode 100644 icolos/loggers/__init__.py create mode 100644 icolos/loggers/agentlogger.py create mode 100644 icolos/loggers/base_logger.py create mode 100644 icolos/loggers/blank_logger.py create mode 100644 icolos/loggers/entrypoint_logger.py create mode 100644 icolos/loggers/iologger.py create mode 100644 icolos/loggers/logger_utils.py create mode 100644 icolos/loggers/steplogger.py create mode 100644 icolos/scripts/__init__.py create mode 100644 icolos/scripts/cli.py create mode 100644 icolos/utils/__init__.py create mode 100644 icolos/utils/constants.py create mode 100644 icolos/utils/entry_point_functions/__init__.py create mode 100644 icolos/utils/entry_point_functions/logging_helper_functions.py create mode 100644 icolos/utils/entry_point_functions/parsing_functions.py create mode 100644 icolos/utils/enums/__init__.py create mode 100644 icolos/utils/enums/composite_agents_enums.py create mode 100644 icolos/utils/enums/compound_enums.py create mode 100644 icolos/utils/enums/entry_points.py create mode 100644 icolos/utils/enums/execution_enums.py create mode 100644 icolos/utils/enums/flow_control_enums.py create mode 100644 icolos/utils/enums/general_utils_enums.py create mode 100644 icolos/utils/enums/input_enums.py create mode 100644 icolos/utils/enums/logging_enums.py create mode 100644 icolos/utils/enums/parallelization.py create mode 100644 icolos/utils/enums/program_parameters.py create mode 100644 icolos/utils/enums/step_enums.py create mode 100644 icolos/utils/enums/step_initialization_enum.py create mode 100644 icolos/utils/enums/write_out_enums.py create mode 100644 icolos/utils/execute_external/__init__.py create mode 100644 icolos/utils/execute_external/autodockvina.py create mode 100644 icolos/utils/execute_external/batch_executor.py create mode 100644 icolos/utils/execute_external/cresset_executor.py create mode 100644 icolos/utils/execute_external/crest.py create mode 100644 icolos/utils/execute_external/execute.py create mode 100644 icolos/utils/execute_external/fep_plus.py create mode 100644 icolos/utils/execute_external/glide.py create mode 100644 icolos/utils/execute_external/gromacs.py create mode 100644 icolos/utils/execute_external/ifd_executor.py create mode 100644 icolos/utils/execute_external/license_token_guard.py create mode 100644 icolos/utils/execute_external/ligprep.py create mode 100644 icolos/utils/execute_external/macromodel.py create mode 100644 icolos/utils/execute_external/omega.py create mode 100644 icolos/utils/execute_external/openbabel.py create mode 100644 icolos/utils/execute_external/pmx.py create mode 100644 icolos/utils/execute_external/prime.py create mode 100644 icolos/utils/execute_external/schrodinger.py create mode 100644 icolos/utils/execute_external/sdconvert.py create mode 100644 icolos/utils/execute_external/structcat.py create mode 100644 icolos/utils/execute_external/turbomole.py create mode 100644 icolos/utils/execute_external/xtb.py create mode 100644 icolos/utils/general/__init__.py create mode 100644 icolos/utils/general/arparse_bool_extension.py create mode 100644 icolos/utils/general/convenience_functions.py create mode 100644 icolos/utils/general/files_paths.py create mode 100644 icolos/utils/general/icolos_exceptions.py create mode 100644 icolos/utils/general/molecules.py create mode 100644 icolos/utils/general/notifications.py create mode 100644 icolos/utils/general/parallelization.py create mode 100644 icolos/utils/general/print_log.py create mode 100644 icolos/utils/general/progress_bar.py create mode 100644 icolos/utils/general/strings.py create mode 100644 icolos/utils/smiles.py create mode 100644 icolos_workflow.py create mode 100644 integration_tests.py create mode 100644 licences/espsim_licence.txt create mode 100644 pyproject.toml create mode 100644 sdf2smi.py create mode 100644 setup.py create mode 100644 tests/CREST/__init__.py create mode 100644 tests/CREST/test_CREST_confgen.py create mode 100644 tests/OMEGA/__init__.py create mode 100644 tests/OMEGA/test_OMEGA_confgen.py create mode 100644 tests/Turbomole/__init__.py create mode 100644 tests/Turbomole/test_Turbomole.py create mode 100644 tests/XTB/__init__.py create mode 100644 tests/XTB/test_XTB_confgen.py create mode 100644 tests/__init__.py create mode 100644 tests/autodockvina/__init__.py create mode 100644 tests/autodockvina/test_adv_docking.py create mode 100644 tests/autodockvina/test_adv_target_prep.py create mode 100644 tests/boltzmann_weighting/__init__.py create mode 100644 tests/boltzmann_weighting/test_boltzmann_weighting.py create mode 100644 tests/cavity_explorer/__init__.py create mode 100644 tests/cavity_explorer/test_md_pocket.py create mode 100644 tests/clustering/__init__.py create mode 100644 tests/clustering/test_clustering.py create mode 100644 tests/composite_agents/__init__.py create mode 100644 tests/composite_agents/test_workflow.py create mode 100644 tests/containers/__init__.py create mode 100644 tests/containers/test_compound.py create mode 100644 tests/containers/test_generic.py create mode 100644 tests/containers/test_perturbation_map.py create mode 100644 tests/cosmo/__init__.py create mode 100644 tests/cosmo/test_Cosmo.py create mode 100644 tests/esp_sim/__init__.py create mode 100644 tests/esp_sim/test_esp_sim.py create mode 100644 tests/feature_counter/__init__.py create mode 100644 tests/feature_counter/test_feature_counter.py create mode 100644 tests/flow_control/__init__.py create mode 100644 tests/flow_control/test_iterator.py create mode 100644 tests/gromacs/__init__.py create mode 100644 tests/gromacs/test_cluster.py create mode 100644 tests/gromacs/test_cluster_ts.py create mode 100644 tests/gromacs/test_do_dssp.py create mode 100644 tests/gromacs/test_editconf.py create mode 100644 tests/gromacs/test_genion.py create mode 100644 tests/gromacs/test_grompp.py create mode 100644 tests/gromacs/test_mdrun.py create mode 100644 tests/gromacs/test_mmpbsa.py create mode 100644 tests/gromacs/test_pdb2gmx.py create mode 100644 tests/gromacs/test_removelig.py create mode 100644 tests/gromacs/test_rmsd.py create mode 100644 tests/gromacs/test_solvate.py create mode 100644 tests/gromacs/test_trjcat.py create mode 100644 tests/gromacs/test_trjconv.py create mode 100644 tests/integration_tests/__init__.py create mode 100644 tests/integration_tests/test_docking.py create mode 100644 tests/integration_tests/test_fep_plus.py create mode 100644 tests/integration_tests/test_gromacs.py create mode 100644 tests/integration_tests/test_rmsd_iter.py create mode 100644 tests/io/__init__.py create mode 100644 tests/io/test_data_manipulation.py create mode 100644 tests/io/test_embedder.py create mode 100644 tests/io/test_initialize_compound.py create mode 100644 tests/panther/__init__.py create mode 100644 tests/panther/test_panther.py create mode 100644 tests/pmx/__init__.py create mode 100644 tests/pmx/test_abfe.py create mode 100644 tests/pmx/test_analyse.py create mode 100644 tests/pmx/test_assemble_systems.py create mode 100644 tests/pmx/test_atomMapping.py create mode 100644 tests/pmx/test_box_water_ions.py create mode 100644 tests/pmx/test_doublebox.py create mode 100644 tests/pmx/test_genlib.py create mode 100644 tests/pmx/test_gentop.py create mode 100644 tests/pmx/test_ligandHybrid.py create mode 100644 tests/pmx/test_mutate.py create mode 100644 tests/pmx/test_prepare_simulations.py create mode 100644 tests/pmx/test_prepare_transitions.py create mode 100644 tests/pmx/test_run_simulations.py create mode 100644 tests/pmx/test_setup_workpath.py create mode 100644 tests/prediction/__init__.py create mode 100644 tests/prediction/test_active_learning.py create mode 100644 tests/prediction/test_model_building.py create mode 100644 tests/prediction/test_predictor.py create mode 100644 tests/rms_filter/__init__.py create mode 100644 tests/rms_filter/test_rmsfilter.py create mode 100644 tests/rmsd/__init__.py create mode 100644 tests/rmsd/test_rmsd.py create mode 100644 tests/schrodinger/__init__.py create mode 100644 tests/schrodinger/test_desmond_production.py create mode 100644 tests/schrodinger/test_desmond_setup.py create mode 100644 tests/schrodinger/test_fep_analysis.py create mode 100644 tests/schrodinger/test_fep_plus_execution.py create mode 100644 tests/schrodinger/test_fep_plus_setup.py create mode 100644 tests/schrodinger/test_glide.py create mode 100644 tests/schrodinger/test_ligprep.py create mode 100644 tests/schrodinger/test_macromodel.py create mode 100644 tests/schrodinger/test_prepwizard.py create mode 100644 tests/schrodinger/test_prime.py create mode 100644 tests/shaep/__init__.py create mode 100644 tests/shaep/test_shaep.py create mode 100644 tests/step_utils/__init__.py create mode 100644 tests/step_utils/test_input_merger.py create mode 100644 tests/step_utils/test_input_preparator.py create mode 100644 tests/step_utils/test_run_variables_resolver.py create mode 100644 tests/step_utils/test_structconvert.py create mode 100644 tests/step_utils/test_writeout.py create mode 100644 tests/structure_prediction/__init__.py create mode 100644 tests/structure_prediction/test_dssp.py create mode 100644 tests/structure_prediction/test_pdb_fixer.py create mode 100644 tests/structure_prediction/test_peptide_embedder.py create mode 100644 tests/tests_paths.py create mode 100644 unit_tests.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3c4e837 --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +__pycache__ +*.pyc +package.json +.vscode +tags +.idea + +timer.dat +git-commands.txt +icolos/config/unit_tests_config/config.json + +tests/junk +*.log +.directory +*_cache +*.ipynb_checkpoints diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..c1f3f38 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,68 @@ +# Update log + +### Version 1.4.0 | 2022-01-19 +#### Features +- Added support for non-equilibrium relative binding free energy calculation with PMX. +- Added Glide support for feeding in "in" files from Maestro directly. +- Added AutoDock Vina as docking backend. + +#### Internal +- Limited refactoring of support functions. + +### Version 1.3.0 | 2021-11-18 +#### Features +- Added Iterator mechanism for parallel step execution. +- Pose rescoring my RMSD workflow. +- MMGBSA workflow with GROMACS. + +#### Internal +- Improved error logging from subprocesses. +- Improvements to MDpocket workflows. +- Refactored example workflows + added new examples. + +### Version 1.2.0 | 2021-09-15 +#### Features +- Added MDpocket workflow for pocket identification. +- Expanded scope of GROMACS workflow for improved ligand/cofactor parametrisation. +- Improved FEP+ workflow map construction and analysis. +- Performance optimisation for Turbomole and Prime. +- Added PDBFixer step. +- Added ensemble docking. + +#### Internal +- Improved temporary file handling. + +### Version 1.1.0 | 2021-06-30 +#### Features +- Added `Ligprep` workflow step. +- Added `Glide` workflow step. +- Added run-time global variables. +- Added JSON input type (`REINVENT`-compatible). +- Additional `GROMACS` binaries, and automated ligand parametrisation. +- Added support for Schrodinger's `FEP+` workflow. +- Added support for `OptunaAZ` model building. + +#### Bug fixes +- Fixed problems in tabular write-out (no compound names and sometimes lost column order). +- Fixed bug in aggregation (`highest_is_best` parameter was not working properly). +- Fixed instability with step write-out (occurred when no conformers were associated with a compound). +- Fixed bug in the parallelization of `Ligprep`. + +#### Internal +- Refactored structure for `Schrodinger` binaries. +- Reworked the write-out functionality. +- Reworked internal file handling. +- Reworked generic data handling. + +### Version 1.0.0 | 2021-05-21 +#### Features +- Basic functionality (data handling, backend wrapping). +- Various steps implemented (`Turbomole`, `Cosmo`, `OMEGA`, `GROMACS`, ...). + +### Bug fixes +- Fixed issues with `Turbomole` execution. +- Enforced GROMACS execution in `tmp_dir`. + +### Internal +- Adapted `pydantic` interface. + diff --git a/DOCKERFILE b/DOCKERFILE new file mode 100644 index 0000000..3971749 --- /dev/null +++ b/DOCKERFILE @@ -0,0 +1,7 @@ +#syntax=docker/dockerfile:1 + +FROM continuumio/miniconda3 + +COPY environment.yml . + +RUN conda env create -f environment.yml diff --git a/LICENCE b/LICENCE new file mode 100644 index 0000000..1f120b7 --- /dev/null +++ b/LICENCE @@ -0,0 +1,169 @@ +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + 1. Definitions. + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + END OF TERMS AND CONDITIONS + APPENDIX: How to apply the Apache License to your work. + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + Copyright 2022 Molecular AI, AstraZeneca + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..3fb6af0 --- /dev/null +++ b/README.md @@ -0,0 +1,75 @@ +[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/python/black) + + +# `Icolos`: Workflow manager + +The `Icolos` tool is a workflow manager, that aims at separating execution logic from actual implementation as much as +possible. Workflows are specified in `JSON` files (see folder `examples`), linking steps together. Currently wrapped are +a diverse set of tools and internal steps, including QM and MD software. + + +## Introduction +`Icolos` provides a single, unified interface to a host of software for common computational chemistry calculations, with built in parallelization, +and straight-forward extensibiltiy to add additional functionality. It was principally developed to handle structural calculations for `REINVENT` jobs, however, various workflows have also been used as stand-alone pipelines. + +Workflows are constructed from elementary 'steps', individual blocks which specify a sequential list of operations (normally corresponding to a single program being executed), +with control of the command-line options provided through step settings, and options to control other aspects of the step's behaviour included in the `additional` block. + +For many use cases, one of the template workflows might suit your needs, or need a few tweaks to do what you want. The JSONs in the example folder are less regularly updated +with new features and are mostly used for integration testing. + +## Initial configuration +You are welcome to clone the repository and use a local version, and in particular if you would like to experiment with the code base and/or contribute features, please get +in contact with us. + +## Installation +After cloning, first install the `icolosprod` `conda` environment: +``` +conda create -f environment_min.yml +``` + +## Execution +Once a `JSON` is specified, the workflow can be executed like so: + +``` +conda activate icolosprod +python executor.py -conf workflow.json +``` + +## `SLURM` Execution +Once specified, a workflow can be called like this in a `bash` script: + +``` +#!/bin/bash -l +#SBATCH -N 1 +#SBATCH -t 0-02:59:00 +#SBATCH -p core +#SBATCH --ntasks-per-node=5 +#SBATCH --mem-per-cpu=2G + +source //miniconda3/bin/activate //minconda3/envs/icolosprod +python //Icolos/executor.py -conf workflow.json +``` +For GROMACS workflows requiring the GPU partition, you will need to adapt the header accordingly, e.g. like so: + +``` +#!/bin/bash +#SBATCH -J gmx_cco1_fold_microsecond +#SBATCH -o MygpuJob_out_%j.txt +#SBATCH -e MygpuJob_err_%j.txt +#SBATCH --nodes=1 +#SBATCH --ntasks=4 +#SBATCH --cpus-per-task=4 +#SBATCH --gres=gpu:4 +#SBATCH --gres-flags=enforce-binding +#SBATCH --mem-per-cpu=4g +#SBATCH -p gpu +#SBATCH --time=370:00:00 + +``` + +## Developers +- Christian Margreitter +- J. Harry Moore +- Matthias R. Bauer diff --git a/environment_min.yml b/environment_min.yml new file mode 100644 index 0000000..8929b20 --- /dev/null +++ b/environment_min.yml @@ -0,0 +1,24 @@ +name: icolosprod +channels: + - psi4 + - conda-forge + - defaults + - rdkit +dependencies: + - biopython>=1.79 + - ipython + - pip + - python>=3.9 + - scikit-learn>=1.0.1 + - modal>=0.4 + - psi4>=1.4 + - pdbfixer + - pydantic>=1.8 + - pyvis + - requests + - openbabel>=3 + - rdkit>=2021.09.2 + - pip: + - black + - regex + - peptidebuilder>=1.1 diff --git a/examples/hpc_script/NIBR_reinvent.sh b/examples/hpc_script/NIBR_reinvent.sh new file mode 100644 index 0000000..a9be63e --- /dev/null +++ b/examples/hpc_script/NIBR_reinvent.sh @@ -0,0 +1,11 @@ +#!/bin/bash -l +#SBATCH -N 1 +#SBATCH -t 0-02:59:00 +#SBATCH -p core +#SBATCH --ntasks-per-node=5 +#SBATCH --mem-per-cpu=2G + +source /projects/cc/mai/miniconda3/bin/activate /projects/cc/mai/miniconda3/envs/Icolos +python /projects/cc/mai/IcolosDev/executor.py -conf /projects/cc/mai/material/Icolos/templates/NIBR/12-06-21nibr.json -debug \ + --global_variables "entrypoint_dir:/icolos, input_path_json:{entrypoint_dir}/tests/data/reinvent/small_input.json, output_path_json:{entrypoint_dir}/tests/junk/nibr_reinvent.json" + diff --git a/examples/hpc_script/SLURM_script.sh b/examples/hpc_script/SLURM_script.sh new file mode 100644 index 0000000..a7fa700 --- /dev/null +++ b/examples/hpc_script/SLURM_script.sh @@ -0,0 +1,11 @@ +#!/bin/bash -l +#SBATCH -N 1 +#SBATCH -t 0-02:59:00 +#SBATCH -p core +#SBATCH --ntasks-per-node=5 +#SBATCH --mem-per-cpu=2G + +source /projects/cc/mai/miniconda3/bin/activate /projects/cc/mai/miniconda3/envs/Icolos +python /projects/cc/mai/Icolos/executor.py -conf /projects/cc/mai/examples/Icolos/MPI_test/workflow_ReSCoSS.json \ + --global_variables "output_dir:/icolos/tests/junk" -debug + diff --git a/examples/workflow/desmond_md.json b/examples/workflow/desmond_md.json new file mode 100644 index 0000000..a9e417f --- /dev/null +++ b/examples/workflow/desmond_md.json @@ -0,0 +1,72 @@ +{ + "workflow": { + "header": { + "workflow_id": "desmond md", + "description": "Desmond simulation.", + "environment": { + "export": [ + ] + }, + "global_variables": { + "output_dir": "{entrypoint_dir}/tests/junk/desmond" + } + }, + "steps": [{ + "step_id": "desmond_md", + "type": "desmond", + "execution": { + "prefix_execution": "module load schrodinger/2021-1-js-aws" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-HOST": "localhost" + } + }, + "additional": { + "setup_msj_fields": { + + }, + "msj_fields": { + + }, + "cfg_fields": { + "time": 0.01 + } + } + }, + "input": { + "generic": [{ + "source": "{entrypoint_dir}/../IcolosData/molecules/1UYD/1UYD_apo.pdb", + "extension": "pdb" + } + ] + }, + "writeout": [ + { + "generic": { + "key": "cms" + }, + "destination": { + "resource": "{output_dir}", + "type": "file", + "format": "txt", + "mode": "automatic" + } + },{ + "generic": { + "key": "dir" + }, + "destination": { + "resource": "{output_dir}", + "type": "file", + "format": "txt", + "mode": "dir" + } + } + ] + } + ] + } +} diff --git a/examples/workflow/docking/active_learning_docking.json b/examples/workflow/docking/active_learning_docking.json new file mode 100644 index 0000000..361fd36 --- /dev/null +++ b/examples/workflow/docking/active_learning_docking.json @@ -0,0 +1,68 @@ +{ + "workflow": { + "header": { + "workflow_id": "Active_learning_docking", + "description": "Bayesian optimisation scheme with Glide docking oracle.", + "environment": { + "export": [ + + ] + }, + "global_variables": { + } + }, + "steps": [{ + "step_id": "active_learning", + "type": "active_learning", + "settings": { + "additional": { + "running_mode": "active_learning", + "virtual_lib": "/lib.sdf", + "validation_lib": "/val.sdf", + "activity_threshold": -7, + "n_rounds": 10, + "init_samples": 256, + "batch_size" : 128, + "criteria": "r_i_docking_score", + "oracle_config": { + "step_id": "Glide_oracle", + "type": "glide", + "execution": { + "prefix_execution": "module load schrodinger/2021-2-js-aws", + "parallelization": { + "cores": 32, + "max_length_sublists": 4 + }, + "failure_policy": { + "n_tries": 3 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-HOST": "cpu-only" + } + }, + "additional": { + "configuration": { + "AMIDE_MODE": "trans", + "EXPANDED_SAMPLING": "True", + "GRIDFILE": [".zip"], + "NENHANCED_SAMPLING": "1", + "POSE_OUTTYPE": "ligandlib_sd", + "POSES_PER_LIG": "1", + "POSTDOCK_NPOSE": "25", + "POSTDOCKSTRAIN": "True", + "PRECISION": "SP", + "REWARD_INTRA_HBONDS": "True" + } + } + } + } + } + } + } + ] + } +} \ No newline at end of file diff --git a/examples/workflow/docking/adv_docking.json b/examples/workflow/docking/adv_docking.json new file mode 100644 index 0000000..36a7987 --- /dev/null +++ b/examples/workflow/docking/adv_docking.json @@ -0,0 +1,120 @@ +{ + "workflow": { + "header": { + "workflow_id": "AutoDock Vina docking", + "description": "Runs docking using AutoDock Vina and a predefined receptor file.", + "environment": { + "export": [ + ] + }, + "global_variables": { + "smiles": "another_mol:Nc1ccc(cc1N)C(F)(F)F;failure:CXXC;aspirin:O=C(C)Oc1ccccc1C(=O)O", + "receptor_path": "{entrypoint_dir}/../IcolosData/AutoDockVina/1UYD_fixed.pdbqt" + } + }, + "steps": [{ + "step_id": "Ligprep", + "type": "ligprep", + "execution": { + "prefix_execution": "module load schrodinger/2020-4", + "parallelization": { + "cores": 2, + "max_length_sublists": 1 + }, + "failure_policy": { + "n_tries": 3 + } + }, + "settings": { + "arguments": { + "flags": ["-epik"], + "parameters": { + "-ph": 7.0, + "-pht": 2.0, + "-s": 10, + "-bff": 14 + } + }, + "additional": { + } + }, + "input": { + "compounds": [{ + "source": "{smiles}", + "source_type": "string" + } + ] + } + }, { + "step_id": "ADV", + "type": "vina_docking", + "execution": { + "prefix_execution": "module load AutoDock_Vina", + "parallelization": { + "cores": 4 + }, + "failure_policy": { + "n_tries": 3 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + } + }, + "additional": { + "configuration": { + "seed": 42, + "receptor_path": "{receptor_path}", + "number_poses": 2, + "search_space": { + "--center_x": 3.3, + "--center_y": 11.5, + "--center_z": 24.8, + "--size_x": 15, + "--size_y": 10, + "--size_z": 10 + } + }, + "grid_ids": ["1UYD"] + } + }, + "input": { + "compounds": [{ + "source": "Ligprep", + "source_type": "step" + } + ] + }, + "writeout": [ + { + "compounds": { + "category": "conformers" + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/adv_docked_conformers.sdf", + "type": "file", + "format": "SDF" + } + }, + { + "compounds": { + "category": "conformers", + "selected_tags": ["docking_score", "grid_id"], + "aggregation": { + "mode": "best_per_compound", + "key": "docking_score" + } + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/adv_docked_conformers.csv", + "type": "file", + "format": "CSV" + } + } + ] + } + ] + } +} \ No newline at end of file diff --git a/examples/workflow/docking/adv_target_preparation.json b/examples/workflow/docking/adv_target_preparation.json new file mode 100644 index 0000000..b66d8ab --- /dev/null +++ b/examples/workflow/docking/adv_target_preparation.json @@ -0,0 +1,44 @@ +{ + "workflow": { + "header": { + "workflow_id": "AutoDock Vina target preparation", + "description": "Runs target preparation for AutoDock Vina and generates a PDBQT receptor file.", + "environment": { + "export": [{ + "key": "OE_LICENSE", + "value": "/opt/scp/software/oelicense/1.0/oe_license.seq1" + } + + ] + }, + "global_variables": { + "receptor_input_apo_path": "{entrypoint_dir}/../IcolosData/molecules/1UYD/1UYD_apo.pdb", + "reference_ligand_path": "{entrypoint_dir}/../IcolosData/molecules/1UYD/PU8_native_ligand.pdb", + "receptor_output_path": "{entrypoint_dir}/tests/junk/1UYD_fixed.pdbqt" + } + }, + "steps": [{ + "step_id": "ADV_target_preparation", + "type": "vina_target_preparation", + "execution": { + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + } + }, + "additional": { + "pH": 7.4, + "input_receptor_pdb": "{receptor_input_apo_path}", + "output_receptor_pdbqt": "{receptor_output_path}", + "extract_box": { + "reference_ligand_path": "{reference_ligand_path}", + "reference_ligand_format": "PDB" + } + } + } + } + ] + } +} diff --git a/examples/workflow/docking/glide_docking.json b/examples/workflow/docking/glide_docking.json new file mode 100644 index 0000000..228db6c --- /dev/null +++ b/examples/workflow/docking/glide_docking.json @@ -0,0 +1,122 @@ +{ + "workflow": { + "header": { + "workflow_id": "Docking with Glide", + "description": "Docking a few compounds with Glide after Ligprep embedding", + "environment": { + "export": [ + ] + }, + "global_variables": { + "smiles": "another_mol:Nc1ccc(cc1N)C(F)(F)F;failure:CXXC;aspirin:O=C(C)Oc1ccccc1C(=O)O" + } + }, + "steps": [{ + "step_id": "Ligprep", + "type": "ligprep", + "execution": { + "prefix_execution": "module load schrodinger/2020-4", + "parallelization": { + "cores": 2, + "max_length_sublists": 1 + }, + "failure_policy": { + "n_tries": 3 + } + }, + "settings": { + "arguments": { + "flags": ["-epik"], + "parameters": { + "-ph": 7.0, + "-pht": 2.0, + "-s": 10, + "-bff": 14 + } + }, + "additional": { + } + }, + "input": { + "compounds": [{ + "source": "{smiles}", + "source_type": "string" + } + ] + } + }, { + "step_id": "Glide", + "type": "glide", + "execution": { + "prefix_execution": "module load schrodinger/2021-1-js-aws", + "parallelization": { + "cores": 4, + "max_length_sublists": 1 + }, + "failure_policy": { + "n_tries": 3 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-HOST": "cpu-only" + } + }, + "additional": { + "configuration": { + "AMIDE_MODE": "trans", + "EXPANDED_SAMPLING": "True", + "GRIDFILE": ["{entrypoint_dir}/../IcolosData/Glide/1UYD_grid_constraints.zip"], + "NENHANCED_SAMPLING": "1", + "POSE_OUTTYPE": "ligandlib_sd", + "POSES_PER_LIG": "3", + "POSTDOCK_NPOSE": "25", + "POSTDOCKSTRAIN": "True", + "PRECISION": "SP", + "REWARD_INTRA_HBONDS": "True" + }, + "maestro_in_file": { + "path": "{entrypoint_dir}/../IcolosData/Glide/example.in" + } + } + }, + "input": { + "compounds": [{ + "source": "Ligprep", + "source_type": "step" + } + ] + }, + "writeout": [ + { + "compounds": { + "category": "conformers" + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/docked_conformers.sdf", + "type": "file", + "format": "SDF" + } + }, + { + "compounds": { + "category": "conformers", + "selected_tags": ["docking_score", "grid_id"], + "aggregation": { + "mode": "best_per_compound", + "key": "docking_score" + } + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/docked_conformers.csv", + "type": "file", + "format": "CSV" + } + } + ] + } + ] + } +} \ No newline at end of file diff --git a/examples/workflow/fep_plus/fep_plus_aws.json b/examples/workflow/fep_plus/fep_plus_aws.json new file mode 100644 index 0000000..6e08345 --- /dev/null +++ b/examples/workflow/fep_plus/fep_plus_aws.json @@ -0,0 +1,171 @@ +{ + "workflow": { + "header": { + "workflow_id": "Docking/FEP+ combined workflow", + "description": "Test setup for FEP+ integration being run in the cloud (AWS).", + "environment": { + "export": [ + ] + }, + "global_variables": { + "smiles": "4-[4-(4-chlorophenyl)-2-methyl-1,3-thiazol-5-yl]benzenesulfonamide:Cc1nc(-c2ccc(Cl)cc2)c(-c2ccc(S(N)(=O)=O)cc2)s1;N-methyl-N-(4-methylphenyl)-4-(4-methylsulfonylphenyl)-6-(trifluoromethyl)pyrimidin-2-amine:Cc1ccc(N(C)c2nc(-c3ccc(S(C)(=O)=O)cc3)cc(C(F)(F)F)n2)cc1" + } + }, + "steps": [{ + "step_id": "initialization_smile", + "type": "initialization", + "input": { + "compounds": [{ + "source": "{smiles}", + "source_type": "string" + } + ] + } + }, { + "step_id": "Ligprep", + "type": "ligprep", + "execution": { + "prefix_execution": "module load schrodinger/2020-4", + "parallelization": { + "cores": 2, + "max_length_sublists": 1 + }, + "failure_policy": { + "n_tries": 3 + } + }, + "settings": { + "arguments": { + "flags": ["-epik"], + "parameters": { + "-ph": 7.0, + "-pht": 2.0, + "-s": 10, + "-bff": 14, + "-HOST": "localhost" + } + }, + "additional": { + "filter_file": { + "Total_charge": "!= 0" + } + } + }, + "input": { + "compounds": [{ + "source": "initialization_smile", + "source_type": "step" + } + ] + } + }, + { + "step_id": "Glide", + "type": "glide", + "execution": { + "prefix_execution": "module load schrodinger/2021-1-js-aws", + "parallelization": { + "cores": 4, + "max_length_sublists": 1 + }, + "failure_policy": { + "n_tries": 3 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-HOST": "cpu-only" + } + }, + "additional": { + "configuration": { + "AMIDE_MODE": "trans", + "EXPANDED_SAMPLING": "True", + "GRIDFILE": ["{entrypoint_dir}/../IcolosData_junk/molecules/1CX2/1cx2_GridGen.zip"], + "NENHANCED_SAMPLING": "1", + "POSE_OUTTYPE": "poseviewer", + "POSES_PER_LIG": "1", + "POSTDOCK_NPOSE": "25", + "POSTDOCKSTRAIN": "True", + "PRECISION": "SP", + "REWARD_INTRA_HBONDS": "True" + } + } + }, + "input": { + "compounds": [{ + "source": "Ligprep", + "source_type": "step" + } + ] + } + },{ + "step_id": "FEP_plus_setup", + "type": "fep_plus_setup", + "execution": { + "prefix_execution": "module load schrodinger/2021-2-js-aws" + }, + "settings": { + + }, + "input": { + "compounds": [{ + "source": "Glide", + "source_type": "step", + "target_field": "compounds" + + }] + } + },{ + "step_id": "FEP_plus_exec", + "type": "fep_plus_exec", + "execution": { + "prefix_execution": "module load schrodinger/2021-2-js-aws && $SCHRODINGER/jsc download-start" + }, + "token_guard": { + "prefix_execution": "module load schrodinger/2021-2-js-aws", + "binary_location": "ssh /opt/schrodinger/suite/installations/default", + "token_pools": { + "FEP_GPGPU": 16 + }, + "wait_interval_seconds": 30, + "wait_limit_seconds": 0 + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-JOBNAME": "test", + "-HOST": "fep-compute" + } + } + }, + "input": { + "compounds": [{ + "source": "Glide", + "source_type": "step", + "target_field": "compounds" + }], + "generic": [{ + "source": "FEP_plus_setup", + "extension": "fmp" + }] + }, + "writeout": [{ + "compounds": { + "category": "conformers", + "selected_tags": ["dG", "docking_score"] + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/fe_plus_resultsv", + "type": "file", + "format": "CSV" + } + }] + } + + ] + } +} diff --git a/examples/workflow/gromacs/gromacs_ensemble_mmgbsa.json b/examples/workflow/gromacs/gromacs_ensemble_mmgbsa.json new file mode 100644 index 0000000..7b04bf8 --- /dev/null +++ b/examples/workflow/gromacs/gromacs_ensemble_mmgbsa.json @@ -0,0 +1,619 @@ +{ + "workflow": { + "header": { + "workflow_id": "gromacs_ensemble_mmgbsa", + "description": "ensemble MMGBSA demonstration - step iteration + SLURM job control", + "environment": { + "export": [ + { + "key": "GMX_GPU_DD_COMMS", + "value": "true" + }, + { + "key": "GMX_GPU_PME_PP_COMMS", + "value": "true" + }, + { + "key": "GMX_FORCE_UPDATE_DEFAULT_GPU", + "value": "true" + }, + { + "key": "ACPYPE", + "value": "${ACPYPE}/acpype" + } + ] + }, + "global_variables": { + "file_base": "{entrypoint_dir}/../IcolosData/gromacs/protein", + "output_dir": "{entrypoint_dir}/tests/junk/gromacs" + } + }, + "steps": [ + { + "step_id": "01_pdb2gmx", + "type": "pdb2gmx", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [ + "-ignh" + ], + "parameters": { + "-water": "tip3p", + "-ff": "amber03" + } + }, + "additional": {} + }, + "input": { + "generic": [ + { + "source": "{file_base}/1BVG.pdb", + "extension": "pdb" + } + ] + } + }, + { + "step_id": "02_editconf", + "type": "editconf", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [ + "-c" + ], + "parameters": { + "-d": "1.2", + "-bt": "dodecahedron" + } + }, + "additional": {} + }, + "input": { + "generic": [ + { + "source": "01_pdb2gmx", + "extension": "gro" + } + ] + } + }, + { + "step_id": "03_solvate", + "type": "solvate", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-cs": "spc216.gro" + } + }, + "additional": {} + }, + "input": { + "generic": [ + { + "source": "02_editconf", + "extension": "gro" + }, + { + "source": "01_pdb2gmx", + "extension": "top" + } + ] + } + }, + { + "step_id": "04_grompp", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "-r": false + } + }, + "input": { + "generic": [ + { + "source": "03_solvate", + "extension": "gro" + }, + { + "source": "{file_base}/ions.mdp", + "extension": "mdp" + }, + { + "source": "03_solvate", + "extension": "top" + }, + { + "source": "01_pdb2gmx", + "extension": "itp" + } + ] + } + }, + { + "step_id": "05_genion", + "type": "genion", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [ + "-neutral" + ], + "parameters": { + "-pname": "NA", + "-nname": "CL" + } + }, + "additional": { + "pipe_input": "SOL" + } + }, + "input": { + "generic": [ + { + "source": "04_grompp", + "extension": "tpr" + }, + { + "source": "04_grompp", + "extension": "top" + }, + { + "source": "04_grompp", + "extension": "itp" + } + ] + } + }, + { + "step_id": "gromacs_iterator", + "type": "iterator", + "base_config": [ + { + "step_id": "06_grompp_eminim", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "-r": false + } + }, + "input": { + "generic": [ + { + "source": "05_genion", + "extension": "gro" + }, + { + "source": "{file_base}/minim.mdp", + "extension": "mdp" + }, + { + "source": "05_genion", + "extension": "top" + }, + { + "source": "05_genion", + "extension": "itp" + } + ] + } + }, + { + "step_id": "07_eminim_mdrun", + "type": "mdrun", + "execution": { + "resource": "slurm", + "resources": { + "partition": "gpu", + "gres": "gpu:1", + "modules": [ + "GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + ] + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": {} + }, + "input": { + "generic": [ + { + "source": "06_grompp_eminim", + "extension": "tpr" + } + ] + } + }, + { + "step_id": "08_nvt_grompp", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-n": "index.ndx" + } + }, + "additional": { + "make_ndx_command": "auto", + "-r": true + } + }, + "input": { + "generic": [ + { + "source": "07_eminim_mdrun", + "extension": "gro" + }, + { + "source": "05_genion", + "extension": "top" + }, + { + "source": "{file_base}/nvt_equil.mdp", + "extension": "mdp" + }, + { + "source": "01_pdb2gmx", + "extension": "itp" + } + ] + } + }, + { + "step_id": "09_nvt_mdrun", + "type": "mdrun", + "execution": { + "resource": "slurm", + "resources": { + "partition": "gpu", + "gres": "gpu:1", + "modules": [ + "GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + ] + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": {} + }, + "input": { + "generic": [ + { + "source": "08_nvt_grompp", + "extension": "tpr" + } + ] + } + }, + { + "step_id": "10_npt_grompp", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-n": "index.ndx" + } + }, + "additional": { + "make_ndx_command": "auto", + "-r": true + } + }, + "input": { + "generic": [ + { + "source": "09_nvt_mdrun", + "extension": "gro" + }, + { + "source": "05_genion", + "extension": "top" + }, + { + "source": "{file_base}/npt_equil.mdp", + "extension": "mdp" + }, + { + "source": "01_pdb2gmx", + "extension": "itp" + }, + { + "source": "08_nvt_grompp", + "extension": "ndx" + } + ] + } + }, + { + "step_id": "11_npt_mdrun", + "type": "mdrun", + "execution": { + "resource": "slurm", + "resources": { + "partition": "gpu", + "gres": "gpu:1", + "modules": [ + "GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + ] + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": {} + }, + "input": { + "generic": [ + { + "source": "10_npt_grompp", + "extension": "tpr" + } + ] + } + }, + { + "step_id": "12_prod_md_grompp", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-n": "index.ndx" + } + }, + "additional": { + "fields": { + "nsteps": "4000000" + }, + "make_ndx_command": "auto", + "-r": false + } + }, + "input": { + "generic": [ + { + "source": "11_npt_mdrun", + "extension": "gro" + }, + { + "source": "05_genion", + "extension": "top" + }, + { + "source": "{file_base}/md.mdp", + "extension": "mdp" + }, + { + "source": "01_pdb2gmx", + "extension": "itp" + } + ] + } + }, + { + "step_id": "13_prod_mdrun", + "type": "mdrun", + "execution": { + "resource": "slurm", + "resources": { + "partition": "gpu", + "gres": "gpu:1", + "modules": [ + "GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + ] + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-nb": "gpu", + "-bonded": "gpu", + "-pme": "gpu" + } + } + }, + "input": { + "generic": [ + { + "source": "12_prod_md_grompp", + "extension": "tpr" + } + ] + } + }, + { + "step_id": "14_trjconv", + "type": "trjconv", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [ + "-center" + ], + "parameters": { + "-pbc": "mol", + "-n": "index.ndx" + } + }, + "additional": { + "pipe_input": "Protein_Other System" + } + }, + "input": { + "generic": [ + { + "source": "13_prod_mdrun", + "extension": "xtc" + }, + { + "source": "12_prod_md_grompp", + "extension": "tpr" + }, + { + "source": "12_prod_md_grompp", + "extension": "ndx" + } + ] + }, + "writeout": [ + { + "generic": { + "key": "xtc" + }, + "destination": { + "resource": "{output_dir}/md_0_1_trjconv.xtc", + "type": "file", + "format": "TXT" + } + } + ] + }, + { + "step_id": "14b_trjconv", + "type": "trjconv", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-fit": "rot+trans", + "-n": "index.ndx" + } + }, + "additional": { + "pipe_input": "Protein_Other System" + } + }, + "input": { + "generic": [ + { + "source": "14_trjconv", + "extension": "xtc" + }, + { + "source": "12_prod_md_grompp", + "extension": "tpr" + }, + { + "source": "12_prod_md_grompp", + "extension": "ndx" + } + ] + } + }, + { + "step_id": "15_gmx_MMPBSA", + "type": "gmx_mmpbsa", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2 && module load gmx_MMPBSA/1.3.3-fosscuda-2019a-Python-3.7.2" + }, + "settings": { + "arguments": { + "parameters": {} + }, + "additional": { + "coupling groups": "Protein Other", + "forcefield": "/amber14sb.ff" + } + }, + "input": { + "generic": [ + { + "source": "14b_trjconv", + "extension": "xtc" + }, + { + "source": "13_prod_mdrun", + "extension": "tpr" + }, + { + "source": "13_prod_mdrun", + "extension": "gro" + }, + { + "source": "12_prod_md_grompp", + "extension": "top" + }, + { + "source": "12_prod_md_grompp", + "extension": "itp" + } + ] + }, + "writeout": [ + { + "generic": { + "key": "dat" + }, + "destination": { + "resource": "{output_dir}/FINAL_RESULTS_MMPBSA.dat", + "type": "file", + "format": "TXT" + } + } + ] + } + ], + "iter_settings": { + "iter_mode": "n_iters", + "n_iters": 25, + "parallelizer_settings": { + "parallelize": true, + "dependent_steps": 11, + "cores": 14 + } + } + } + ] + } +} \ No newline at end of file diff --git a/examples/workflow/gromacs/gromacs_fpocket.json b/examples/workflow/gromacs/gromacs_fpocket.json new file mode 100644 index 0000000..9642647 --- /dev/null +++ b/examples/workflow/gromacs/gromacs_fpocket.json @@ -0,0 +1,656 @@ +{ + "workflow": { + "header": { + "workflow_id": "gromacs_md_fpocket", + "description": "full md run with gromacs, with MDpocket run to extract descriptors for binding pockets", + "environment": { + "export": [ + { + "key": "GMX_GPU_DD_COMMS", + "value": "true" + }, + { + "key": "GMX_GPU_PME_PP_COMMS", + "value": "true" + }, + { + "key": "GMX_FORCE_UPDATE_DEFAULT_GPU", + "value": "true" + }, + { + "key": "ACPYPE", + "value": "${ACPYPE}/acpype" + } + ] + }, + "global_variables": { + "file_base": "{entrypoint_dir}/../IcolosData/gromacs/protein", + "output_dir": "{entrypoint_dir}/tests/junk/gromacs" + } + }, + "steps": [ + { + "step_id": "01_pdb2gmx", + "type": "pdb2gmx", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [ + "-ignh" + ], + "parameters": { + "-water": "tip3p", + "-ff": "amber03" + } + }, + "additional": { + "removeres": [ + "DMP" + ] + } + }, + "input": { + "generic": [ + { + "source": "{file_base}/1BVG.pdb", + "extension": "pdb" + } + ] + } + }, + { + "step_id": "02_editconf", + "type": "editconf", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [ + "-c" + ], + "parameters": { + "-d": "1.5", + "-bt": "dodecahedron" + } + }, + "additional": {} + }, + "input": { + "generic": [ + { + "source": "01_pdb2gmx", + "extension": "gro" + } + ] + } + }, + { + "step_id": "03_solvate", + "type": "solvate", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-cs": "spc216.gro" + } + }, + "additional": {} + }, + "input": { + "generic": [ + { + "source": "02_editconf", + "extension": "gro" + }, + { + "source": "01_pdb2gmx", + "extension": "top" + } + ] + } + }, + { + "step_id": "04_grompp", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "-r": false + } + }, + "input": { + "generic": [ + { + "source": "03_solvate", + "extension": "gro" + }, + { + "source": "{file_base}/ions.mdp", + "extension": "mdp" + }, + { + "source": "03_solvate", + "extension": "top" + }, + { + "source": "01_pdb2gmx", + "extension": "itp" + } + ] + } + }, + { + "step_id": "05_genion", + "type": "genion", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [ + "-neutral" + ], + "parameters": { + "-pname": "NA", + "-nname": "CL" + } + }, + "additional": { + "pipe_input": "SOL" + } + }, + "input": { + "generic": [ + { + "source": "04_grompp", + "extension": "tpr" + }, + { + "source": "04_grompp", + "extension": "top" + } + ] + } + }, + { + "step_id": "06_grompp_eminim", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "-r": false + } + }, + "input": { + "generic": [ + { + "source": "05_genion", + "extension": "gro" + }, + { + "source": "{file_base}/minim.mdp", + "extension": "mdp" + }, + { + "source": "05_genion", + "extension": "top" + }, + { + "source": "01_pdb2gmx", + "extension": "itp" + } + ] + } + }, + { + "step_id": "07_eminim_mdrun", + "type": "mdrun", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + } + }, + "input": { + "generic": [ + { + "source": "06_grompp_eminim", + "extension": "tpr" + } + ] + } + }, + { + "step_id": "08_nvt_grompp", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-n": "index.ndx" + } + }, + "additional": { + "-r": true, + "make_ndx_command": "auto" + } + }, + "input": { + "generic": [ + { + "source": "07_eminim_mdrun", + "extension": "gro" + }, + { + "source": "05_genion", + "extension": "top" + }, + { + "source": "{file_base}/nvt_equil.mdp", + "extension": "mdp" + }, + { + "source": "01_pdb2gmx", + "extension": "itp" + } + ] + } + }, + { + "step_id": "09_nvt_mdrun", + "type": "mdrun", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": {} + }, + "input": { + "generic": [ + { + "source": "08_nvt_grompp", + "extension": "tpr" + } + ] + } + }, + { + "step_id": "10_npt_grompp", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-n": "index.ndx" + } + }, + "additional": { + "-r": true, + "make_ndx_command": "auto" + } + }, + "input": { + "generic": [ + { + "source": "09_nvt_mdrun", + "extension": "gro" + }, + { + "source": "05_genion", + "extension": "top" + }, + { + "source": "{file_base}/npt_equil.mdp", + "extension": "mdp" + }, + { + "source": "01_pdb2gmx", + "extension": "itp" + }, + { + "source": "08_nvt_grompp", + "extension": "ndx" + } + ] + } + }, + { + "step_id": "11_npt_mdrun", + "type": "mdrun", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": {} + }, + "input": { + "generic": [ + { + "source": "10_npt_grompp", + "extension": "tpr" + } + ] + } + }, + { + "step_id": "12_prod_md_grompp", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-n": "index.ndx" + } + }, + "additional": { + "-r": false, + "fields": { + "nsteps": "500" + }, + "make_ndx_command": "auto" + } + }, + "input": { + "generic": [ + { + "source": "11_npt_mdrun", + "extension": "gro" + }, + { + "source": "05_genion", + "extension": "top" + }, + { + "source": "{file_base}/md.mdp", + "extension": "mdp" + }, + { + "source": "01_pdb2gmx", + "extension": "itp" + }, + { + "source": "08_nvt_grompp", + "extension": "ndx" + } + ] + } + }, + { + "step_id": "13_prod_mdrun", + "type": "mdrun", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-nb": "gpu", + "-bonded": "gpu", + "-pme": "gpu", + "-c": "structure.pdb" + } + } + }, + "input": { + "generic": [ + { + "source": "12_prod_md_grompp", + "extension": "tpr" + } + ] + }, + "writeout": [ + { + "generic": { + "key": "xtc" + }, + "destination": { + "resource": "{output_dir}/md_0_1.xtc", + "type": "file", + "format": "txt" + } + }, + { + "generic": { + "key": "log" + }, + "destination": { + "resource": "{output_dir}/md_0_1.log", + "type": "file", + "format": "txt" + } + }, + { + "generic": { + "key": "pdb" + }, + "destination": { + "resource": "{output_dir}/md_0_1.pdb", + "type": "file", + "format": "txt" + } + }, + { + "generic": { + "key": "tpr" + }, + "destination": { + "resource": "{output_dir}/md_0_1.tpr", + "type": "file", + "format": "txt" + } + } + ] + }, + { + "step_id": "14_trjconv", + "type": "trjconv", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [ + "-center" + ], + "parameters": { + "-pbc": "nojump" + } + }, + "additional": { + "pipe_input": "Protein Protein" + } + }, + "input": { + "generic": [ + { + "source": "13_prod_mdrun", + "extension": "xtc" + }, + { + "source": "13_prod_mdrun", + "extension": "tpr" + } + ] + } + }, + { + "step_id": "15_trjconv", + "type": "trjconv", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-fit": "rot+trans" + } + }, + "additional": { + "pipe_input": "Protein Protein" + } + }, + "input": { + "generic": [ + { + "source": "14_trjconv", + "extension": "xtc" + }, + { + "source": "14_trjconv", + "extension": "tpr" + } + ] + }, + "writeout": [ + { + "generic": { + "key": "xtc" + }, + "destination": { + "resource": "{output_dir}/md_0_1_trjconv.xtc", + "type": "file", + "format": "txt" + } + } + ] + }, + { + "step_id": "16_editconf", + "type": "editconf", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-n": "index.ndx", + "-o": "structure.pdb" + } + }, + "additional": { + "pipe_input": "Protein" + } + }, + "input": { + "generic": [ + { + "source": "12_prod_md_grompp", + "extension": "gro" + }, + { + "source": "12_prod_md_grompp", + "extension": "ndx" + } + ] + } + }, + { + "step_id": "17_MDpocket", + "type": "mdpocket", + "execution": { + "prefix_execution": "module load fpocket" + + }, + "settings": { + "arguments": { + "parameters": {} + }, + "additional": { + "format": "gromacs" + } + }, + "input": { + "generic": [ + { + "source": "15_trjconv", + "extension": "xtc" + }, + { + "source": "16_editconf", + "extension": "pdb" + } + ] + }, + "writeout": [ + { + "generic": { + "key": "pdb" + }, + "destination": { + "resource": "{output_dir}", + "type": "file", + "format": "txt", + "mode": "dir" + } + }, + { + "generic": { + "key": "txt" + }, + "destination": { + "resource": "{output_dir}", + "type": "file", + "format": "txt", + "mode": "dir" + } + }, + { + "generic": { + "key": "dx" + }, + "destination": { + "resource": "{output_dir}", + "type": "file", + "format": "txt", + "mode": "dir" + } + } + ] + } + ] + } +} \ No newline at end of file diff --git a/examples/workflow/gromacs/gromacs_md.json b/examples/workflow/gromacs/gromacs_md.json new file mode 100644 index 0000000..17a2d40 --- /dev/null +++ b/examples/workflow/gromacs/gromacs_md.json @@ -0,0 +1,488 @@ +{ + "workflow": { + "header": { + "workflow_id": "gromacs_test", + "description": "full md run with gromacs", + "environment": { + "export": [{ + "key": "GMX_GPU_DD_COMMS", + "value": "true" + }, { + "key": "GMX_GPU_PME_PP_COMMS", + "value": "true" + }, { + "key": "GMX_FORCE_UPDATE_DEFAULT_GPU", + "value": "true" + }, { + "key": "ACPYPE", + "value": "${ACPYPE}/acpype" + } + ] + }, + "global_variables": { + "file_base": "{entrypoint_dir}/../IcolosData/gromacs/protein", + "output_dir": "{entrypoint_dir}/tests/junk/gromacs" + } + }, + "steps": [{ + "step_id": "01_pdb2gmx", + "type": "pdb2gmx", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + + }, + "settings": { + "arguments": { + "flags": ["-ignh"], + "parameters": { + "-water": "tip3p", + "-ff": "amber03" + } + }, + "additional": { + } + }, + "input": { + "generic": [ + { + "source": "{file_base}/1BVG.pdb", + "extension": "pdb" + } + ]} + },{ + "step_id": "02_editconf", + "type": "editconf", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": ["-c"], + "parameters": { + "-d": "1.0", + "-bt": "dodecahedron" + } + + }, + "additional": { + } + }, + "input": { + "generic": [ + { + "source": "01_pdb2gmx", + "extension": "gro" + } + + ]} + + },{ + "step_id": "03_solvate", + "type": "solvate", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-cs": "spc216" + } + }, + "additional": { + } + }, + "input": { + "generic": [ + { + "source": "02_editconf", + "extension": "gro" + },{ + "source": "01_pdb2gmx", + "extension": "top" + } + + ]} + + }, + { + "step_id": "04_grompp", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + } + }, + "additional": { + "-r": false + } + }, + "input": { + "generic": [ + { + "source": "03_solvate", + "extension": "gro" + },{ + "source": "{file_base}/ions.mdp", + "extension": "mdp" + },{ + "source": "03_solvate", + "extension": "top" + },{ + "source": "01_pdb2gmx", + "extension": "itp" + } + ]} + + },{ + "step_id": "05_genion", + "type": "genion", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": ["-neutral"], + "parameters": { + "-pname": "NA", + "-nname": "CL" + } + }, + "additional": { + "pipe_input": "SOL" + } + }, + "input": { + "generic": [ + { + "source": "04_grompp", + "extension": "tpr" + },{ + "source": "04_grompp", + "extension": "top" + },{ + "source": "04_grompp", + "extension": "itp" + } + ]} + + },{ + "step_id": "06_grompp_eminim", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + + } + }, + "additional": { + "-r": false + } + }, + "input": { + "generic": [ + { + "source": "05_genion", + "extension": "gro" + },{ + "source": "{file_base}/minim.mdp", + "extension": "mdp" + },{ + "source": "05_genion", + "extension": "top" + },{ + "source": "05_genion", + "extension": "itp" + } + + ]} + },{ + "step_id": "07_eminim_mdrun", + "type": "mdrun", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + + } + }, + "additional": { + } + }, + "input": { + "generic": [ + { + "source": "06_grompp_eminim", + "extension": "tpr" + } + ]} + },{ + "step_id": "08_nvt_grompp", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-n": "index.ndx" + } + }, + "additional": { + "make_ndx_command": "auto", + "-r": true + } + }, + "input": { + "generic": [ + { + "source": "07_eminim_mdrun", + "extension": "gro" + },{ + "source": "05_genion", + "extension": "top" + },{ + "source": "{file_base}/nvt_equil.mdp", + "extension": "mdp" + },{ + "source": "01_pdb2gmx", + "extension": "itp" + } + ]} + },{ + "step_id": "09_nvt_mdrun", + "type": "mdrun", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + } + }, + "additional": { + } + }, + "input": { + "generic": [ + { + "source": "08_nvt_grompp", + "extension": "tpr" + } + ]} + },{ + "step_id": "10_npt_grompp", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-n": "index.ndx" + } + }, + "additional": { + "make_ndx_command": "auto", + "-r": true + } + }, + "input": { + "generic": [ + { + "source": "09_nvt_mdrun", + "extension": "gro" + },{ + "source": "05_genion", + "extension": "top" + },{ + "source": "{file_base}/npt_equil.mdp", + "extension": "mdp" + },{ + "source": "01_pdb2gmx", + "extension": "itp" + } + ]} + },{ + "step_id": "11_npt_mdrun", + "type": "mdrun", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + } + }, + "additional": { + "make_ndx_command": "auto" + } + }, + "input": { + "generic": [ + { + "source": "10_npt_grompp", + "extension": "tpr" + } + ]} + + },{ + "step_id": "12_prod_md_grompp", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-n": "index.ndx" + } + }, + "additional": { + "fields": { + "nsteps": "50000" + }, + "make_ndx_command": "auto", + "-r": false + } + }, + "input": { + "generic": [ + { + "source": "11_npt_mdrun", + "extension": "gro" + },{ + "source": "05_genion", + "extension": "top" + },{ + "source": "{file_base}/md.mdp", + "extension": "mdp" + },{ + "source": "01_pdb2gmx", + "extension": "itp" + } + ] + } + },{ + "step_id": "13_prod_mdrun", + "type": "mdrun", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-nb": "gpu", + "-bonded": "gpu" + } + } + }, + "input": { + "generic": [ + { + "source": "12_prod_md_grompp", + "extension": "tpr" + } + ] + }, + "writeout": [ + { + "generic": { + "key": "xtc" + }, + "destination": { + "resource": "{output_dir}/md_0_1.xtc", + "type": "file", + "format": "TXT" + } + }, + { + "generic": { + "key": "log" + }, + "destination": { + "resource": "{output_dir}/md_0_1.log", + "type": "file", + "format": "TXT" + } + }, + { + "generic": { + "key": "gro" + }, + "destination": { + "resource": "{output_dir}/md_0_1.gro", + "type": "file", + "format": "TXT" + } + }, + { + "generic": { + "key": "tpr" + }, + "destination": { + "resource": "{output_dir}/md_0_1.tpr", + "type": "file", + "format": "TXT" + } + } + ] + },{ + "step_id": "14_trjconv", + "type": "trjconv", + "execution":{ + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings":{ + "arguments":{ + "flags":["-center"] + }, + "additional":{ + "pipe_input": "Protein System" + } + }, + "input":{ + "generic":[ + { + "source": "13_prod_mdrun", + "extension": "xtc" + }, + { + "source": "13_prod_mdrun", + "extension": "tpr" + } + ] + }, + "writeout":[ + { + "generic": { + "key": "xtc" + }, + "destination": { + "resource": "{output_dir}/md_0_1_trjconv.xtc", + "type": "file", + "format": "TXT" + } + } + ] + } + ] + } +} \ No newline at end of file diff --git a/examples/workflow/gromacs/gromacs_mmgbsa.json b/examples/workflow/gromacs/gromacs_mmgbsa.json new file mode 100644 index 0000000..0ed2725 --- /dev/null +++ b/examples/workflow/gromacs/gromacs_mmgbsa.json @@ -0,0 +1,580 @@ +{ + "workflow": { + "header": { + "workflow_id": "gromacs_test", + "description": "full md run with gromacs with ligand parametrisation and mmgbsa calculation", + "environment": { + "export": [ + { + "key": "GMX_GPU_DD_COMMS", + "value": "true" + }, + { + "key": "GMX_GPU_PME_PP_COMMS", + "value": "true" + }, + { + "key": "GMX_FORCE_UPDATE_DEFAULT_GPU", + "value": "true" + }, + { + "key": "ACPYPE", + "value": "${ACPYPE}/acpype" + } + + + ] + }, + "global_variables": { + "file_base": "{entrypoint_dir}/../IcolosData/gromacs/protein", + "output_dir": "{entrypoint_dir}/tests/junk/gromacs" + + } + }, + "steps": [ + { + "step_id": "01_pdb2gmx", + "type": "pdb2gmx", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [ + "-ignh" + ], + "parameters": { + "-water": "tip3p", + "-ff": "amber14sb" + } + }, + "additional": { + } + }, + "input": { + "generic": [ + { + "source": "{file_base}/1BVG.pdb", + "extension": "pdb" + } + ] + } + }, + { + "step_id": "02_editconf", + "type": "editconf", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [ + "-c" + ], + "parameters": { + "-d": "1.0", + "-bt": "dodecahedron" + } + }, + "additional": {} + }, + "input": { + "generic": [ + { + "source": "01_pdb2gmx", + "extension": "gro" + } + ] + } + }, + { + "step_id": "03_solvate", + "type": "solvate", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-cs": "spc216.gro" + } + }, + "additional": {} + }, + "input": { + "generic": [ + { + "source": "02_editconf", + "extension": "gro" + }, + { + "source": "01_pdb2gmx", + "extension": "top" + } + ] + } + }, + { + "step_id": "04_grompp", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "-r": false + } + }, + "input": { + "generic": [ + { + "source": "03_solvate", + "extension": "gro" + }, + { + "source": "{file_base}/ions.mdp", + "extension": "mdp" + }, + { + "source": "03_solvate", + "extension": "top" + }, + { + "source": "01_pdb2gmx", + "extension": "itp" + } + ] + } + }, + { + "step_id": "05_genion", + "type": "genion", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [ + "-neutral" + ], + "parameters": { + "-pname": "NA", + "-nname": "CL" + } + }, + "additional": { + "pipe_input": "SOL" + } + }, + "input": { + "generic": [ + { + "source": "04_grompp", + "extension": "tpr" + }, + { + "source": "04_grompp", + "extension": "top" + }, + { + "source": "04_grompp", + "extension": "itp" + } + ] + } + }, + { + "step_id": "06_grompp_eminim", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "-r": false + } + }, + "input": { + "generic": [ + { + "source": "05_genion", + "extension": "gro" + }, + { + "source": "{file_base}/minim.mdp", + "extension": "mdp" + }, + { + "source": "05_genion", + "extension": "top" + }, + { + "source": "05_genion", + "extension": "itp" + } + ] + } + }, + { + "step_id": "07_eminim_mdrun", + "type": "mdrun", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": {} + }, + "input": { + "generic": [ + { + "source": "06_grompp_eminim", + "extension": "tpr" + } + ] + } + }, + { + "step_id": "08_nvt_grompp", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-n": "index.ndx" + } + }, + "additional": { + "make_ndx_command": "auto", + "-r": true + } + }, + "input": { + "generic": [ + { + "source": "07_eminim_mdrun", + "extension": "gro" + }, + { + "source": "05_genion", + "extension": "top" + }, + { + "source": "{file_base}/nvt_equil.mdp", + "extension": "mdp" + }, + { + "source": "01_pdb2gmx", + "extension": "itp" + } + ] + } + }, + { + "step_id": "09_nvt_mdrun", + "type": "mdrun", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": {} + }, + "input": { + "generic": [ + { + "source": "08_nvt_grompp", + "extension": "tpr" + } + ] + } + }, + { + "step_id": "10_npt_grompp", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-n": "index.ndx" + } + }, + "additional": { + "make_ndx_command": "auto", + "-r": true + } + }, + "input": { + "generic": [ + { + "source": "09_nvt_mdrun", + "extension": "gro" + }, + { + "source": "05_genion", + "extension": "top" + }, + { + "source": "{file_base}/npt_equil.mdp", + "extension": "mdp" + }, + { + "source": "01_pdb2gmx", + "extension": "itp" + }, + { + "source": "08_nvt_grompp", + "extension": "ndx" + } + ] + } + }, + { + "step_id": "11_npt_mdrun", + "type": "mdrun", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": {} + }, + "input": { + "generic": [ + { + "source": "10_npt_grompp", + "extension": "tpr" + } + ] + } + }, + { + "step_id": "12_prod_md_grompp", + "type": "grompp", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-n": "index.ndx" + } + }, + "additional": { + "fields": { + "nsteps": "50000" + }, + "make_ndx_command": "auto", + "-r": false + } + }, + "input": { + "generic": [ + { + "source": "11_npt_mdrun", + "extension": "gro" + }, + { + "source": "05_genion", + "extension": "top" + }, + { + "source": "{file_base}/md.mdp", + "extension": "mdp" + }, + { + "source": "01_pdb2gmx", + "extension": "itp" + } + ] + } + }, + { + "step_id": "13_prod_mdrun", + "type": "mdrun", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-nb": "gpu", + "-bonded": "gpu", + "-pme": "gpu" + } + } + }, + "input": { + "generic": [ + { + "source": "12_prod_md_grompp", + "extension": "tpr" + } + ] + }, + "writeout": [ + { + "generic": { + "key": "xtc" + }, + "destination": { + "resource": "{output_dir}/md_0_1.xtc", + "type": "file", + "format": "TXT" + } + }, + { + "generic": { + "key": "log" + }, + "destination": { + "resource": "{output_dir}/md_0_1.log", + "type": "file", + "format": "TXT" + } + }, + { + "generic": { + "key": "gro" + }, + "destination": { + "resource": "{output_dir}/md_0_1.gro", + "type": "file", + "format": "TXT" + } + }, + { + "generic": { + "key": "tpr" + }, + "destination": { + "resource": "{output_dir}/md_0_1.tpr", + "type": "file", + "format": "TXT" + } + } + ] + }, + { + "step_id": "14_trjconv", + "type": "trjconv", + "execution": { + "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a" + }, + "settings": { + "arguments": { + "flags": [ + "-center" + ] + }, + "additional": { + "pipe_input": "System System" + } + }, + "input": { + "generic": [ + { + "source": "13_prod_mdrun", + "extension": "xtc" + }, + { + "source": "13_prod_mdrun", + "extension": "tpr" + } + ] + }, + "writeout": [ + { + "generic": { + "key": "xtc" + }, + "destination": { + "resource": "{output_dir}/md_0_1_trjconv.xtc", + "type": "file", + "format": "TXT" + } + } + ] + }, + { + "step_id": "15_gmx_MMPBSA", + "type": "gmx_mmpbsa", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2 && module load gmx_MMPBSA/1.3.3-fosscuda-2019a-Python-3.7.2" + }, + "settings": { + "arguments": { + "parameters": { + } + }, + "additional": { + "coupling_groups": "Protein Other", + "forcefield": "/amber14sb.ff" + } + }, + "input": { + "generic": [ + { + "source": "14_trjconv", + "extension": "xtc" + }, + { + "source": "13_prod_mdrun", + "extension": "tpr" + }, + { + "source": "13_prod_mdrun", + "extension": "gro" + }, + { + "source": "12_prod_md_grompp", + "extension": "top" + }, + { + "source": "12_prod_md_grompp", + "extension": "itp" + } + ] + }, + "writeout": [ + { + "generic": { + "key": "dat" + }, + "destination": { + "resource": "{output_dir}/FINAL_RESULTS_MMPBSA.pdb", + "type": "file", + "format": "TXT" + } + } + ] + } + ] + } +} \ No newline at end of file diff --git a/examples/workflow/input_file_types.json b/examples/workflow/input_file_types.json new file mode 100644 index 0000000..476e7e9 --- /dev/null +++ b/examples/workflow/input_file_types.json @@ -0,0 +1,91 @@ +{ + "workflow": { + "header": { + "id": "input_file_types_example", + "description": "This configuration illustrates the use of different input file types.", + "logging": { + "logfile": "tests/junk/input_file_types.log" + }, + "environment": { + "export": [{ + "key": "XTBHOME", + "value": "/opt/scp/services/reinvent/Icolos/binaries/xtb-6.3.2" + }, { + "key": "XTBPATH", + "value": "${XTBHOME}/share/xtb" + }, { + "key": "PATH", + "value": "${PATH}:${XTBHOME}/bin" + }, { + "key": "PKG_CONFIG_PATH", + "value": "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig" + } + ] + } + }, + "steps": [{ + "step_id": "01_initialization_from_SDF", + "type": "initialization", + "input": { + "compounds": [ + { + "source": "{entrypoint_dir}/../IcolosData/molecules/aspirin.sdf", + "source_type": "file", + "format": "SDF" + } + ] + } + }, + { + "step_id": "01_initialization_SMI", + "type": "initialization", + "input": { + "compounds": [ + { + "source": "{entrypoint_dir}/../IcolosData/molecules/paracetamol.smi", + "source_type": "file", + "format": "SMI" + } + ] + } + }, + { + "step_id": "01_initialization_CSV", + "type": "initialization", + "input": { + "compounds": [ + { + "source": "{entrypoint_dir}/../IcolosData/molecules/small_molecules.csv", + "columns": { + "smiles": "SMILES", + "names": "name" + }, + "source_type": "file", + "format": "CSV" + } + ] + } + }, + { + "step_id": "02_embed_SMI", + "type": "embedding", + "settings": { + "arguments": { + "flags": [], + "parameters": { + "method": "rdkit" + } + } + }, + "input": { + "compounds": [ + { + "source": "01_initialization_SMI", + "source_type": "step" + } + ] + } + } + ] + } +} diff --git a/examples/workflow/pmx/pmx_rbfe.json b/examples/workflow/pmx/pmx_rbfe.json new file mode 100644 index 0000000..cbfad71 --- /dev/null +++ b/examples/workflow/pmx/pmx_rbfe.json @@ -0,0 +1,376 @@ +{ + "workflow": { + "header": { + "workflow_id": "Full PMX workflow - rbfe", + "description": "PMX full map calculation with parallel GPU sims.", + "environment": { + "export": [ + { + "key": "GMX_GPU_DD_COMMS", + "value": "true" + }, + { + "key": "GMX_GPU_PME_PP_COMMS", + "value": "true" + }, + { + "key": "GMX_FORCE_UPDATE_DEFAULT_GPU", + "value": "true" + }, + { + "key": "ACPYPE", + "value": "${ACPYPE}/acpype" + }, + { + "key": "PMX_PYTHON", + "value": "${CONDA}/envs/pmx/bin/python" + }, + { + "key": "PMX", + "value": "${CONDA}/envs/pmx/bin/pmx" + }, + { + "key": "GMXLIB", + "value": "${PMX}/src/pmx/data/mutff" + } + ] + }, + "global_variables": { + "file_path": "{entrypoint_dir}/../IcolosData/pmx", + "output_dir": "{work_dir}/tests/junk/pmx" + }, + "global_settings": { + "single_directory": true, + "remove_temporary_files": false + } + }, + "steps": [ + { + "step_id": "fep_setup", + "type": "fep_plus_setup", + "execution": { + "prefix_execution": "module load schrodinger/2021-2-js-aws" + }, + "settings": { + "arguments": { + "parameters": { + "-num-procs": 24 + } + } + }, + "input": { + "compounds": [ + { + "source": "{file_path}/compounds.sdf", + "source_type": "file", + "format": "SDF" + } + ], + "generic": [ + { + "source": "{file_path}/receptor.pdb", + "extension": "pdb" + } + ] + } + }, + { + "step_id": "01_pmx_setup", + "type": "pmx_setup", + "execution": { + "parallelization": { + "cores": 24 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "water": "tip3p", + "forcefield": "amber99sb-star-ildn-mut", + "replicas": 1 + } + }, + "input": { + "generic": [ + { + "source": "fep_setup", + "extension": "log" + }, + { + "source": "{file_path}/receptor.pdb", + "extension": "pdb" + }, + { + "source": "{file_path}/mdppath/", + "extension": "mdp" + } + ], + "compounds": [ + { + "source": "fep_setup", + "source_type": "step" + } + ], + "work_dir": "{output_dir}" + } + }, + { + "step_id": "02_pmx_atomMapping", + "type": "pmx_atomMapping", + "execution": { + "parallelization": { + "cores": 24 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "water": "tip3p", + "forcefield": "amber99sb-star-ildn-mut" + } + }, + "input": { + "perturbation_map": [ + { + "source": "01_pmx_setup" + } + ] + } + }, + { + "step_id": "03_pmx_ligandHybrid", + "type": "pmx_ligandHybrid", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-cs": "spc216.gro" + } + }, + "additional": {} + }, + "input": { + "perturbation_map": [ + { + "source": "02_pmx_atomMapping" + } + ] + } + }, + { + "step_id": "04_assemble_systems", + "type": "pmx_assemble_systems", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2", + "binary_location": "$PMX_PYTHON /src/pmx/scripts/icolos_entrypoints/", + "parallelization": { + "cores": 24 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "edges": [] + } + }, + "input": { + "perturbation_map": [ + { + "source": "03_pmx_ligandHybrid" + } + ] + } + }, + { + "step_id": "05_box_water_ions", + "type": "pmx_box_water_ions", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2", + "parallelization": { + "cores": 24 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": {} + }, + "input": { + "perturbation_map": [ + { + "source": "04_assemble_systems" + } + ] + } + }, + { + "step_id": "06_prepare_simulations", + "type": "pmx_prepare_simulations", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2", + "parallelization": { + "cores": 24 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "sim_type": "em" + } + } + }, + { + "step_id": "06b_run_simulations", + "type": "pmx_run_simulations", + "execution": { + "resource": "slurm", + "resources": { + "partition": "gpu", + "gres": "gpu:1", + "modules": [ + "GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + ] + }, + "parallelization": { + "cores": 16 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "sim_type": "em" + } + } + }, + { + "step_id": "07_prepare_simulations", + "type": "pmx_prepare_simulations", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2", + "parallelization": { + "cores": 24 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "sim_type": "eq" + } + } + }, + { + "step_id": "07b_run_simulations", + "type": "pmx_run_simulations", + "execution": { + "resource": "slurm", + "resources": { + "partition": "gpu", + "gres": "gpu:1", + "modules": [ + "GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + ] + }, + "parallelization": { + "cores": 16 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "sim_type": "eq" + } + } + }, + { + "step_id": "08_prep_transitions", + "type": "pmx_prepare_transitions", + "execution": { + "prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2", + "parallelization": { + "cores": 24 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "sim_type": "transitions" + } + } + }, + { + "step_id": "09_run_transitions", + "type": "pmx_run_simulations", + "execution": { + "resource": "slurm", + "resources": { + "partition": "gpu", + "gres": "gpu:1", + "modules": [ + "GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + ] + }, + "parallelization": { + "cores": 16 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "sim_type": "transitions" + } + }, + "input": { + "generic": [] + } + }, + { + "step_id": "pmx_analyse", + "type": "pmx_run_analysis", + "execution": { + "parallelization": { + "cores": 24 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": {} + } + } + ] + } +} diff --git a/examples/workflow/qm/ePSA_permeability.json b/examples/workflow/qm/ePSA_permeability.json new file mode 100644 index 0000000..6ba4bd8 --- /dev/null +++ b/examples/workflow/qm/ePSA_permeability.json @@ -0,0 +1,334 @@ +{ + "workflow": { + "header": { + "id": "ePSA_permeability_example", + "description": "A shortened, simplified version of the ReSCoSS workflow to calculate descriptors for molecules to predict ePSA and permeability values.", + "environment": { + "export": [{ + "key": "XTBHOME", + "value": "/projects/cc/mai/binaries/xtb-6.4.0" + }, { + "key": "XTBPATH", + "value": "${XTBHOME}/share/xtb" + }, { + "key": "PATH", + "value": "${PATH}:${XTBHOME}/bin" + }, { + "key": "PKG_CONFIG_PATH", + "value": "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig" + } + ] + }, + "global_variables": { + "ePSA_model_path": "/projects/cc/mai/material/Icolos/models/ePSA/2020-11-13_DFTB_RF_final.pkl", + "permeability_model_path": "/projects/cc/mai/material/Icolos/models/permeability/2021-03-26_DFTB_permeability_RF_final.pkl" + } + }, + "steps": [ { + "step_id": "initialization", + "type": "initialization", + "input": { + "compounds": [{ + "source": "{entrypoint_dir}/../IcolosData/molecules/paracetamol.sdf", + "source_type": "file", + "format": "SDF" + } + ] + } + }, { + "step_id": "omega_confgen", + "type": "omega", + "execution": { + "prefix_execution": "module load omega" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-maxconfs": 200, + "-rms": 0.8, + "-canonOrder": "false" + } + } + }, + "input": { + "compounds": [{ + "source": "initialization", + "source_type": "step" + } + ] + } + }, { + "step_id": "conf_macromodel", + "type": "macromodel", + "execution": { + "prefix_execution": "module load schrodinger/2019-4" + }, + "token_guard": { + "prefix_execution": "module load schrodinger/2019-4", + "token_pools": { + "MMOD_MACROMODEL": 2 + }, + "wait_interval_seconds": 30, + "wait_limit_seconds": 0 + }, + "settings": { + "arguments": { + "flags": ["-WAIT"], + "parameters": { + "-NJOBS": 1 + } + } + }, + "input": { + "compounds": [{ + "source": "initialization", + "source_type": "step" + } + ] + } + }, { + "step_id": "RMSfiltering1", + "type": "rmsfilter", + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "method": "alignmol", + "threshold": 1 + } + }, + "input": { + "compounds": [{ + "source": "omega_confgen", + "source_type": "step" + }, { + "source": "conf_macromodel", + "source_type": "step" + } + ], + "merge": { + "compounds": true, + "merge_compounds_by": "name", + "enumerations": true, + "merge_enumerations_by": "id" + } + } + }, { + "step_id": "conf_optXTB", + "type": "xtb", + "execution": { + "binary_location": "/projects/cc/mai/binaries/xtb-6.4.0", + "parallelization": { + "cores": 10 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "--opt": "vtight", + "--gbsa": "h2o" + } + } + }, + "input": { + "compounds": [{ + "source": "RMSfiltering1", + "source_type": "step" + } + ] + } + }, { + "step_id": "RMSfiltering2", + "type": "rmsfilter", + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "method": "alignmol", + "threshold": 1 + } + }, + "input": { + "compounds": [{ + "source": "conf_optXTB", + "source_type": "step" + } + ] + } + }, { + "step_id": "turbomole", + "type": "turbomole", + "execution": { + "prefix_execution": "module load turbomole/73", + "failure_policy": { + "n_tries": 5 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "tm_config_dir": "/projects/cc/mai/material/Icolos/turbomole_config", + "tm_config_basename": "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge", + "tm_config_cosmo": "/projects/cc/mai/material/Icolos/turbomole_config/cosmoprep_eps80.tm", + "execution_mode": "ridft" + } + }, + "input": { + "compounds": [{ + "source": "RMSfiltering2", + "source_type": "step" + } + ] + } + }, { + "step_id": "cosmo", + "type": "cosmo", + "execution": { + "prefix_execution": "module load COSMOtherm/20.0.0" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": {} + } + }, + "input": { + "compounds": [{ + "source": "turbomole", + "source_type": "step" + } + ] + } + }, { + "step_id": "boltzmann_weighting", + "type": "boltzmann_weighting", + "settings": { + "arguments": { + "flags": [], + "parameters": { + "properties": [{ + "input": "G_h2o", + "output": "boltzfactor_wat" + }, { + "input": "G_meoh", + "output": "boltzfactor_meoh" + }, { + "input": "G_octanol", + "output": "boltzfactor_octanol" + }, { + "input": "G_dmso", + "output": "boltzfactor_dmso" + }, { + "input": "G_cychex", + "output": "boltzfactor_cychex" + }, { + "input": "G_chcl3", + "output": "boltzfactor_chcl3" + }, { + "input": "G_acn", + "output": "boltzfactor_acn" + }, { + "input": "G_thf", + "output": "boltzfactor_thf" + } + ], + "weight": { + "input": ["area", "HB_acc", "volume", "HB_don", "sigma2", "sigma4", "Gsolv_meoh", "Gsolv_h2o", "Gsolv_cychex", "volume"], + "output_prefix": "bf_weighted", + "properties": ["boltzfactor_dmso", "boltzfactor_wat", + "boltzfactor_meoh", "boltzfactor_cychex"] + } + } + } + }, + "input": { + "compounds": [{ + "source": "cosmo", + "source_type": "step" + } + ] + } + }, { + "step_id": "ePSA_prediction", + "type": "prediction", + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "model_path": "{ePSA_model_path}", + "features": ["bf_weighted_volume_boltzfactor_dmso", "bf_weighted_area_boltzfactor_dmso", + "bf_weighted_HB_acc_boltzfactor_dmso", "bf_weighted_HB_don_boltzfactor_dmso", + "bf_weighted_sigma2_boltzfactor_dmso", "bf_weighted_Gsolv_meoh_boltzfactor_dmso"], + "name_predicted": "pred_ePSA" + } + }, + "input": { + "compounds": [{ + "source": "boltzmann_weighting", + "source_type": "step" + } + ] + } + }, { + "step_id": "permeability_prediction", + "type": "prediction", + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "model_path": "{permeability_model_path}", + "features": ["bf_weighted_volume_boltzfactor_wat", "bf_weighted_sigma4_boltzfactor_wat", "bf_weighted_Gsolv_h2o_boltzfactor_wat", + "bf_weighted_HB_don_boltzfactor_wat", "bf_weighted_HB_acc_boltzfactor_wat", "bf_weighted_Gsolv_meoh_boltzfactor_meoh", + "bf_weighted_Gsolv_cychex_boltzfactor_cychex"], + "name_predicted": "pred_permeability" + } + }, + "input": { + "compounds": [{ + "source": "ePSA_prediction", + "source_type": "step" + } + ] + }, + "writeout": [{ + "compounds": { + "category": "conformers" + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/ePSA_permeability_final.sdf", + "type": "file", + "format": "SDF" + } + }, { + "compounds": { + "category": "conformers", + "selected_tags": ["pred_ePSA", "pred_permeability"], + "aggregation": { + "mode": "best_per_compound", + "key": "pred_ePSA" + } + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/ePSA.csv", + "type": "file", + "format": "CSV" + } + } + ] + } + ] + } +} diff --git a/examples/workflow/qm/full_rescoss.json b/examples/workflow/qm/full_rescoss.json new file mode 100644 index 0000000..0e4a157 --- /dev/null +++ b/examples/workflow/qm/full_rescoss.json @@ -0,0 +1,464 @@ +{ + "workflow": { + "header": { + "id": "rescoss", + "description": "Full ReSCoSS configuration (version 1.0).", + "environment": { + "export": [{ + "key": "XTBHOME", + "value": "/projects/cc/mai/binaries/xtb-6.4.0" + }, { + "key": "XTBPATH", + "value": "${XTBHOME}/share/xtb" + }, { + "key": "PATH", + "value": "${PATH}:${XTBHOME}/bin" + }, { + "key": "PKG_CONFIG_PATH", + "value": "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig" + }, { + "key": "PARA_ARCH", + "value": "MPI" + }, { + "key": "PARNODES", + "value": "6" + } + ] + }, + "global_variables": { + } + }, + "steps": [{ + "step_id": "initialization", + "type": "initialization", + "input": { + "compounds": [ + { + "source": "{entrypoint_dir}/../IcolosData/molecules/aspirin.sdf", + "source_type": "file", + "format": "SDF" + } + ] + } + }, { + "step_id": "omega_confgen", + "type": "omega", + "execution": { + "prefix_execution": "module load omega" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-maxconfs": 200, + "-rms": 0.8, + "-canonOrder": "false" + } + } + }, + "input": { + "compounds": [ + { + "source": "initialization", + "source_type": "step" + } + ] + } + }, { + "step_id": "conf_macromodel", + "type": "macromodel", + "execution": { + "prefix_execution": "module load schrodinger/2019-4" + }, + "token_guard": { + "prefix_execution": "module load schrodinger/2019-4", + "token_pools": { + "MMOD_MACROMODEL": 2 + }, + "wait_interval_seconds": 30, + "wait_limit_seconds": 0 + }, + "settings": { + "arguments": { + "flags": ["-WAIT"], + "parameters": { + "-NJOBS": 1 + } + } + }, + "input": { + "compounds": [ + { + "source": "initialization", + "source_type": "step" + } + ] + } + }, { + "step_id": "RMSfiltering1", + "type": "rmsfilter", + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "method": "alignmol", + "threshold": 1 + } + }, + "input": { + "compounds": [ + { + "source": "omega_confgen", + "source_type": "step" + }, + { + "source": "conf_macromodel", + "source_type": "step" + } + ], + "merge": { + "compounds": true, + "merge_compounds_by": "name", + "enumerations": true, + "merge_enumerations_by": "id" + } + } + }, { + "step_id": "conf_optXTB", + "type": "xtb", + "execution": { + "binary_location": "/projects/cc/mai/binaries/xtb-6.4.0", + "parallelization": { + "cores": 10 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "--opt": "vtight", + "--gbsa": "h2o" + } + } + }, + "input": { + "compounds": [ + { + "source": "RMSfiltering1", + "source_type": "step" + } + ] + } + }, { + "step_id": "RMSfiltering2", + "type": "rmsfilter", + "settings": { + "arguments": { + "flags": [], + "parameters": {} + }, + "additional": { + "method": "alignmol", + "threshold": 1 + } + }, + "input": { + "compounds": [ + { + "source": "conf_optXTB", + "source_type": "step" + } + ] + } + }, { + "step_id": "turbomole", + "type": "turbomole", + "execution": { + "prefix_execution": "module load turbomole/73", + "failure_policy": { + "n_tries": 5 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + } + }, + "additional": { + "tm_config_dir": "/projects/cc/mai/material/Icolos/turbomole_config", + "tm_config_basename": "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge", + "tm_config_cosmo": "/projects/cc/mai/material/Icolos/turbomole_config/cosmoprep_eps80.tm", + "execution_mode": "ridft" + } + }, + "input": { + "compounds": [ + { + "source": "RMSfiltering2", + "source_type": "step" + } + ] + } + }, { + "step_id": "cosmo", + "type": "cosmo", + "execution": { + "prefix_execution": "module load COSMOtherm/20.0.0" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + } + } + }, + "input": { + "compounds": [ + { + "source": "turbomole", + "source_type": "step" + } + ] + } + }, { + "step_id": "boltzmann_weighting", + "type": "boltzmann_weighting", + "settings": { + "arguments": { + "flags": [], + "parameters": { + "properties": [ + { + "input": "G_h2o", + "output": "boltzfactor_wat" + }, + { + "input": "G_meoh", + "output": "boltzfactor_meoh" + }, + { + "input": "G_octanol", + "output": "boltzfactor_octanol" + }, + { + "input": "G_dmso", + "output": "boltzfactor_dmso" + }, + { + "input": "G_cychex", + "output": "boltzfactor_cychex" + }, + { + "input": "G_chcl3", + "output": "boltzfactor_chcl3" + }, + { + "input": "G_acn", + "output": "boltzfactor_acn" + }, + { + "input": "G_thf", + "output": "boltzfactor_thf" + } + ], + "weight": { + "input": ["area", "HB_acc", "volume", "HB_don", "sigma2", "Gsolv_meoh", "dipole"], + "output_prefix": "bf_weighted", + "properties": ["boltzfactor_dmso", "boltzfactor_wat", + "boltzfactor_meoh", "boltzfactor_cychex"] + } + } + } + }, + "input": { + "compounds": [ + { + "source": "cosmo", + "source_type": "step" + } + ] + } + }, + { + "step_id": "clustering", + "type": "clustering", + "settings": + { + "arguments": + { + "flags": [], + "parameters": + { + "n_clusters": 3, + "max_iter": 300 + } + }, + "additional": + { + "top_n_per_solvent": 3, + "features": ["area", "dipole", "HB_acc", "HB_don"], + "free_energy_solvent_tags": ["G_h2o", "G_meoh", "G_octanol", + "G_dmso", "G_cychex", "G_acn", + "G_thf"] + } + }, + "input": { + "compounds": [ + { + "source": "boltzmann_weighting", + "source_type": "step" + } + ] + } + }, { + "step_id": "turbomole_opt", + "type": "turbomole", + "execution": { + "prefix_execution": "module load turbomole/73", + "failure_policy": { + "n_tries": 3 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-c": 150, + "-gcart": 3 + } + }, + "additional": { + "tm_config_dir": "/projects/cc/mai/material/Icolos/turbomole_config", + "tm_config_basename": "b97-3c-ri-d3-def2-mtzvp-int-charge", + "tm_config_cosmo": "/projects/cc/mai/material/Icolos/turbomole_config/cosmoprep_eps80.tm", + "execution_mode": "jobex" + } + }, + "input": { + "compounds": [ + { + "source": "clustering", + "source_type": "step" + } + ] + }, + "writeout": [{ + "compounds": { + "category": "extra_data", + "key": "cosmo_file" + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/[compound_name]/[conformer_id].cosmo", + "type": "file", + "format": "TXT" + } + }, { + "compounds": { + "category": "extra_data", + "key": "coord_file" + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/[compound_name]/[conformer_id].coord", + "type": "file", + "format": "TXT" + } + }] + }, { + "step_id": "cosmo_opt", + "type": "cosmo", + "execution": { + "prefix_execution": "module load COSMOtherm/20.0.0" + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + } + } + }, + "input": { + "compounds": [ + { + "source": "turbomole_opt", + "source_type": "step" + } + ] + } + }, + { + "step_id": "boltzmann_weighting_opt", + "type": "boltzmann_weighting", + "settings": { + "arguments": { + "flags": [], + "parameters": { + "properties": [ + { + "input": "G_h2o", + "output": "boltzfactor_wat" + }, + { + "input": "G_meoh", + "output": "boltzfactor_meoh" + }, + { + "input": "G_octanol", + "output": "boltzfactor_octanol" + }, + { + "input": "G_dmso", + "output": "boltzfactor_dmso" + }, + { + "input": "G_cychex", + "output": "boltzfactor_cychex" + }, + { + "input": "G_chcl3", + "output": "boltzfactor_chcl3" + }, + { + "input": "G_acn", + "output": "boltzfactor_acn" + }, + { + "input": "G_thf", + "output": "boltzfactor_thf" + } + ], + "weight": { + "input": ["area", "HB_acc", "volume", "HB_don", "sigma2", "Gsolv_meoh", "dipole"], + "output_prefix": "bf_weighted", + "properties": ["boltzfactor_dmso", "boltzfactor_wat", + "boltzfactor_meoh", "boltzfactor_cychex"] + } + } + } + }, + "input": { + "compounds": [ + { + "source": "cosmo_opt", + "source_type": "step" + } + ] + }, + "writeout": [ + { + "compounds": { + "category": "conformers" + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/full_rescoss_reweighting_output_opt.sdf", + "type": "file", + "format": "SDF" + } + } + ] + } + ] + } +} \ No newline at end of file diff --git a/examples/workflow/reinvent/feature_counter.json b/examples/workflow/reinvent/feature_counter.json new file mode 100644 index 0000000..6ba23d8 --- /dev/null +++ b/examples/workflow/reinvent/feature_counter.json @@ -0,0 +1,71 @@ +{ + "workflow": { + "header": { + "workflow_id": "Feature counting", + "description": "Feature counting - number of rings.", + "environment": { + }, + "global_variables": { + "input_path_json": "{entrypoint_dir}/../IcolosData/reinvent/small_input.json", + "output_path_json": "{entrypoint_dir}/tests/junk/fc_rings_reinvent.json" + } + }, + "steps": [{ + "step_id": "embed_molecules", + "type": "embedding", + "settings": { + "arguments": { + "flags": [], + "parameters": { + "method": "rdkit" + } + }, + "additional": { + "embed_as": "conformers" + } + }, + "input": { + "compounds": [{ + "source": "{input_path_json}", + "source_type": "file", + "format": "JSON" + + } + ] + } + }, { + "step_id": "feature_count", + "type": "feature_counter", + "settings": { + "additional": { + "feature": "num_rings" + } + }, + "input": { + "compounds": [{ + "source": "embed_molecules", + "source_type": "step" + } + ] + }, + "writeout": [{ + "compounds": { + "category": "conformers", + "selected_tags": ["num_rings"], + "aggregation": { + "mode": "best_per_compound", + "key": "num_rings", + "highest_is_best": true + } + }, + "destination": { + "resource": "{output_path_json}", + "type": "file", + "format": "JSON" + } + } + ] + } + ] + } +} diff --git a/examples/workflow/reinvent/nibr_local_reinvent.json b/examples/workflow/reinvent/nibr_local_reinvent.json new file mode 100644 index 0000000..fad85f9 --- /dev/null +++ b/examples/workflow/reinvent/nibr_local_reinvent.json @@ -0,0 +1,159 @@ +{ + "workflow": { + "header": { + "workflow_id": "NIBR", + "description": "NIBR (local) workflow with returning results in REINVENT JSON format - no use of cloud computing such as AWS.", + "environment": { + "export": [{ + "key": "XTBHOME", + "value": "/projects/cc/mai/binaries/xtb-6.4.0" + }, { + "key": "XTBPATH", + "value": "${XTBHOME}/share/xtb" + }, { + "key": "PATH", + "value": "${PATH}:${XTBHOME}/bin" + }, { + "key": "PKG_CONFIG_PATH", + "value": "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig" + }, { + "key": "PARA_ARCH", + "value": "MPI" + }, { + "key": "PARNODES", + "value": "6" + } + + ] + }, + "global_variables": { + "smiles": "aspirin:O=C(C)Oc1ccccc1C(=O)O" + } + }, + "steps": [{ + "step_id": "Ligprep", + "type": "ligprep", + "execution": { + "prefix_execution": "module load schrodinger/2020-4", + "parallelization": { + "cores": 2, + "max_length_sublists": 1 + }, + "failure_policy": { + "n_tries": 3 + } + }, + "settings": { + "arguments": { + "flags": ["-epik"], + "parameters": { + "-ph": 7.0, + "-pht": 2.0, + "-s": 10, + "-bff": 14 + } + }, + "additional": { + } + }, + "input": { + "compounds": [{ + "source": "{smiles}", + "source_type": "string" + } + ] + } + }, { + "step_id": "Glide", + "type": "glide", + "execution": { + "prefix_execution": "module load schrodinger/2021-1-js-aws", + "parallelization": { + "cores": 4, + "max_length_sublists": 1 + }, + "failure_policy": { + "n_tries": 3 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-HOST": "cpu-only" + } + }, + "additional": { + "configuration": { + "AMIDE_MODE": "trans", + "EXPANDED_SAMPLING": "True", + "GRIDFILE": ["{entrypoint_dir}/../IcolosData/Glide/1UYD_grid_constraints.zip"], + "NENHANCED_SAMPLING": "1", + "POSE_OUTTYPE": "ligandlib_sd", + "POSES_PER_LIG": "3", + "POSTDOCK_NPOSE": "25", + "POSTDOCKSTRAIN": "True", + "PRECISION": "SP", + "REWARD_INTRA_HBONDS": "True" + }, + "maestro_in_file": { + "path": "{entrypoint_dir}/../IcolosData/Glide/example.in" + } + } + }, + "input": { + "compounds": [{ + "source": "Ligprep", + "source_type": "step" + } + ] + } + }, { + "step_id": "Shaep", + "type": "shaep", + "execution": { + "binary_location": "/projects/cc/mai/binaries" + }, + "input": { + "generic": [{ + "source": "{entrypoint_dir}/../IcolosData/panther/1uyd_negative_image.mol2", + "extension": "mol2" + }], + "compounds": [{ + "source": "Glide", + "target_field": "compounds", + "source_type": "step" + } + ] + }, + "writeout": [{ + "compounds": { + "category": "conformers", + "selected_tags": ["shape_similarity", "esp_similarity", "docking_score"] + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/nibr_reinvent/nibr_reinvent_all.csv", + "type": "file", + "format": "CSV" + } + }, { + "compounds": { + "category": "conformers", + "selected_tags": ["shape_similarity", "esp_similarity", "docking_score"], + "aggregation": { + "mode": "best_per_compound", + "key": "shape_similarity", + "highest_is_best": true + } + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/nibr_reinvent/nibr_reinvent.json", + "type": "file", + "format": "JSON" + } + } + ] + } + ] + } +} diff --git a/examples/workflow/rescoring/negative_image_panther.json b/examples/workflow/rescoring/negative_image_panther.json new file mode 100644 index 0000000..1c4bb3f --- /dev/null +++ b/examples/workflow/rescoring/negative_image_panther.json @@ -0,0 +1,58 @@ +{ + "workflow": { + "header": { + "workflow_id": "panther_test_example", + "description": "Panther setup to generate negative image.", + "environment": { + "export": [{ + "key": "XTBHOME", + "value": "/projects/cc/mai/binaries/xtb-6.4.0" + }, { + "key": "XTBPATH", + "value": "${XTBHOME}/share/xtb" + }, { + "key": "PATH", + "value": "${PATH}:${XTBHOME}/bin" + }, { + "key": "PKG_CONFIG_PATH", + "value": "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig" + }, { + "key": "PARA_ARCH", + "value": "MPI" + }, { + "key": "PARNODES", + "value": "6" + } + ] + }, + "global_variables": { + } + }, + "steps": [{ + "step_id": "negative_image_generation", + "type": "panther", + "settings": { + "additional": { + "panther_location": "/projects/cc/mai/binaries/panther", + "panther_config_file": "{entrypoint_dir}/../IcolosData/panther/default_panther.in", + "fields": { + "1-Pdb file": "{entrypoint_dir}/../IcolosData/panther/1UYD_holo_residue_X.pdb" + } + } + }, + "writeout": [ + { + "generic": { + "key": "mol2" + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/panther/1uyd_panther_negative_image.mol2", + "type": "file", + "format": "txt" + } + } + ] + } + ] + } +} diff --git a/examples/workflow/rescoring/nibr_local.json b/examples/workflow/rescoring/nibr_local.json new file mode 100644 index 0000000..c0d12bf --- /dev/null +++ b/examples/workflow/rescoring/nibr_local.json @@ -0,0 +1,161 @@ +{ + "workflow": { + "header": { + "workflow_id": "NIBR", + "description": "NIBR (local) workflow - no use of cloud computing such as AWS.", + "environment": { + "export": [{ + "key": "XTBHOME", + "value": "/projects/cc/mai/binaries/xtb-6.4.0" + }, { + "key": "XTBPATH", + "value": "${XTBHOME}/share/xtb" + }, { + "key": "PATH", + "value": "${PATH}:${XTBHOME}/bin" + }, { + "key": "PKG_CONFIG_PATH", + "value": "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig" + }, { + "key": "PARA_ARCH", + "value": "MPI" + }, { + "key": "PARNODES", + "value": "6" + } + + ] + }, + "global_variables": { + "smiles": "aspirin:O=C(C)Oc1ccccc1C(=O)O" + } + }, + "steps": [{ + "step_id": "Ligprep", + "type": "ligprep", + "execution": { + "prefix_execution": "module load schrodinger/2020-4", + "parallelization": { + "cores": 2, + "max_length_sublists": 1 + }, + "failure_policy": { + "n_tries": 3 + } + }, + "settings": { + "arguments": { + "flags": ["-epik"], + "parameters": { + "-ph": 7.0, + "-pht": 2.0, + "-s": 10, + "-bff": 14 + } + }, + "additional": { + } + }, + "input": { + "compounds": [{ + "source": "{smiles}", + "source_type": "string" + } + ] + } + }, { + "step_id": "Glide", + "type": "glide", + "execution": { + "prefix_execution": "module load schrodinger/2021-1-js-aws", + "parallelization": { + "cores": 4, + "max_length_sublists": 1 + }, + "failure_policy": { + "n_tries": 3 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-HOST": "cpu-only" + } + }, + "additional": { + "configuration": { + "AMIDE_MODE": "trans", + "EXPANDED_SAMPLING": "True", + "GRIDFILE": ["{entrypoint_dir}/../IcolosData/Glide/1UYD_grid_constraints.zip"], + "NENHANCED_SAMPLING": "1", + "POSE_OUTTYPE": "ligandlib_sd", + "POSES_PER_LIG": "3", + "POSTDOCK_NPOSE": "25", + "POSTDOCKSTRAIN": "True", + "PRECISION": "SP", + "REWARD_INTRA_HBONDS": "True" + }, + "maestro_in_file": { + "path": "{entrypoint_dir}/../IcolosData/Glide/example.in" + } + } + }, + "input": { + "compounds": [{ + "source": "Ligprep", + "source_type": "step" + } + ] + } + }, { + "step_id": "Shaep", + "type": "shaep", + "execution": { + "binary_location": "/projects/cc/mai/binaries" + }, + "input": { + "generic": [{ + "source": "{entrypoint_dir}/../IcolosData/panther/1uyd_negative_image.mol2", + "extension": "mol2" + }], + "compounds": [{ + "source": "Glide", + "target_field": "compounds", + "source_type": "step" + } + ] + }, + "writeout": [ + { + "compounds": { + "category": "conformers", + "selected_tags": ["shape_similarity", "esp_similarity"] + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/nibr_final_all.csv", + "type": "file", + "format": "CSV" + } + }, + { + "compounds": { + "category": "conformers", + "selected_tags": ["shape_similarity", "esp_similarity"], + "aggregation": { + "mode": "best_per_compound", + "key": "shape_similarity", + "highest_is_best": true + } + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/nibr_final_bestpercompound.csv", + "type": "file", + "format": "CSV" + } + } + ] + } + ] + } +} diff --git a/examples/workflow/rescoring/rmsd_rescoring.json b/examples/workflow/rescoring/rmsd_rescoring.json new file mode 100644 index 0000000..1315064 --- /dev/null +++ b/examples/workflow/rescoring/rmsd_rescoring.json @@ -0,0 +1,223 @@ +{ + "workflow": { + "header": { + "workflow_id": "RMSD_rescoring", + "description": "Run RMSD rescoring on docking poses.", + "environment": { + "export": [{ + "key": "XTBHOME", + "value": "/projects/cc/mai/binaries/xtb-6.4.0" + }, { + "key": "XTBPATH", + "value": "${XTBHOME}/share/xtb" + }, { + "key": "PATH", + "value": "${PATH}:${XTBHOME}/bin" + }, { + "key": "PKG_CONFIG_PATH", + "value": "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig" + }, { + "key": "PARA_ARCH", + "value": "MPI" + }, { + "key": "PARNODES", + "value": "6" + } + + ] + }, + "global_variables": { + "smiles": "aspirin:O=C(C)Oc1ccccc1C(=O)O" + } + }, + "steps": [{ + "step_id": "Ligprep", + "type": "ligprep", + "execution": { + "prefix_execution": "module load schrodinger/2020-4", + "parallelization": { + "cores": 2, + "max_length_sublists": 1 + }, + "failure_policy": { + "n_tries": 3 + } + }, + "settings": { + "arguments": { + "flags": ["-epik"], + "parameters": { + "-ph": 7.0, + "-pht": 2.0, + "-s": 10, + "-bff": 14 + } + }, + "additional": {} + }, + "input": { + "compounds": [{ + "source": "{smiles}", + "source_type": "string" + } + ] + } + }, { + "step_id": "Glide", + "type": "glide", + "execution": { + "prefix_execution": "module load schrodinger/2021-1-js-aws", + "parallelization": { + "cores": 4, + "max_length_sublists": 1 + }, + "failure_policy": { + "n_tries": 3 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "-HOST": "cpu-only" + } + }, + "additional": { + "configuration": { + "AMIDE_MODE": "trans", + "EXPANDED_SAMPLING": "True", + "GRIDFILE": [ + "{entrypoint_dir}/../IcolosData/Glide/1UYD_grid_constraints.zip" + ], + "NENHANCED_SAMPLING": "1", + "POSE_OUTTYPE": "ligandlib_sd", + "POSES_PER_LIG": "3", + "POSTDOCK_NPOSE": "25", + "POSTDOCKSTRAIN": "True", + "PRECISION": "SP", + "REWARD_INTRA_HBONDS": "True" + } + } + }, + "input": { + "compounds": [{ + "source": "Ligprep", + "source_type": "step" + } + ] + } + }, { + "step_id": "compound_filter", + "type": "data_manipulation", + "settings": { + "additional": { + "action": "filter", + "filter_level": "compounds", + "criteria": "docking_score", + "return_n": 1, + "highest_is_best": false + } + }, + "input": { + "compounds": [{ + "source": "Glide", + "source_type": "step" + } + ] + } + }, { + "step_id": "xtb", + "type": "xtb", + "execution": { + "binary_location": "/projects/cc/mai/binaries/xtb-6.4.0", + "parallelization": { + "cores": 4 + } + }, + "settings": { + "arguments": { + "flags": [], + "parameters": { + "--opt": "vtight", + "--gbsa": "h2o" + } + } + }, + "input": { + "compounds": [{ + "source": "compound_filter", + "source_type": "step" + } + ] + }, + "writeout": [{ + "compounds": { + "category": "conformers" + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/rmsd_rescoring_xtb.sdf", + "type": "file", + "format": "SDF" + } + } + ] + }, { + "step_id": "data_manipulation", + "type": "data_manipulation", + "settings": { + "additional": { + "action": "attach_conformers_as_extra", + "source": "xtb" + } + }, + "input": { + "compounds": [{ + "source": "compound_filter", + "source_type": "step" + } + ] + } + }, { + "step_id": "rmsd", + "type": "rmsd", + "settings": { + "additional": { + "method": "alignmol" + } + }, + "input": { + "compounds": [{ + "source": "data_manipulation", + "source_type": "step" + } + ] + }, + "writeout": [{ + "compounds": { + "category": "conformers" + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/rmsd_rescoring.sdf", + "type": "file", + "format": "SDF" + } + }, { + "compounds": { + "category": "conformers", + "selected_tags": ["docking_score", "rmsd", "grid_id"], + "aggregation": { + "mode": "best_per_compound", + "key": "docking_score" + } + }, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/rmsd_rescoring.csv", + "type": "file", + "format": "CSV" + } + } + ] + } + ] + } +} diff --git a/executor.py b/executor.py new file mode 100644 index 0000000..290ecae --- /dev/null +++ b/executor.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +# coding=utf-8 + +import os +import sys +import json +import argparse +from datetime import datetime +from icolos.core.composite_agents.workflow import WorkFlow + +from icolos.loggers.entrypoint_logger import EntryPointLogger + +from icolos.utils.enums.composite_agents_enums import WorkflowEnum +from icolos.utils.enums.logging_enums import LoggingConfigEnum +from icolos.utils.enums.entry_points import ExecutorEnum + +from icolos.utils.entry_point_functions.logging_helper_functions import ( + initialize_logging, +) +from icolos.utils.entry_point_functions.parsing_functions import parse_header +from icolos.utils.general.files_paths import attach_root_path + + +if __name__ == "__main__": + + # enums + _LE = LoggingConfigEnum() + _EE = ExecutorEnum() + _WE = WorkflowEnum() + + # initialize logger + logger = EntryPointLogger() + + # get the input parameters and parse them + parser = argparse.ArgumentParser( + description='Implements entry point for the "Icolos" workflow class.' + ) + parser.add_argument( + "-conf", + type=str, + default=None, + help="A path to an workflow's configuration file (JSON dictionary) that is to be executed.", + ) + parser.add_argument( + "-debug", + action="store_true", + help='Set this flag to activate the inbuilt debug logging mode (this will overwrite parameter "-log_conf", if set).', + ) + parser.add_argument( + "--global_variables", + nargs="+", + default=None, + type=str, + help='List of strings, setting global variables with key and value, e.g. "root:/path/to/root".', + ) + parser.add_argument( + "--global_settings", + nargs="+", + default=None, + type=str, + help='List of strings, setting global settings with key and value, e.g. "remove_temporary_files:False, single_directory:True".', + ) + args, args_unk = parser.parse_known_args() + + if args.conf is None or not os.path.isfile(args.conf): + raise Exception( + 'Parameter "-conf" must be a relative or absolute path to a configuration (JSON) file.' + ) + + # load configuration + with open(args.conf) as file: + conf = file.read().replace("\r", "").replace("\n", "") + conf = json.loads(conf) + + # set the logging configuration according to parameters + log_conf = attach_root_path(_LE.PATH_CONFIG_DEFAULT) + if args.debug: + log_conf = attach_root_path(_LE.PATH_CONFIG_DEBUG) + logger = initialize_logging(log_conf_path=log_conf, workflow_conf=conf) + + # update global variables and settings + conf = parse_header( + conf=conf, args=args, entry_point_path=os.path.realpath(__file__), logger=logger + ) + + # generate workflow object + workflow = WorkFlow(**conf[_WE.WORKFLOW]) + workflow.initialize() + + # execute the whole workflow + st_time = datetime.now() + workflow.execute() + exec_time = datetime.now() - st_time + logger.log(f"Icolos workflow completed. Walltime: {exec_time}.", _LE.INFO) + sys.exit(0) diff --git a/external_documentation/REINVENT_input.json b/external_documentation/REINVENT_input.json new file mode 100644 index 0000000..84b08ee --- /dev/null +++ b/external_documentation/REINVENT_input.json @@ -0,0 +1,4 @@ +{ + "names": ["0", "1", "3"], + "smiles": ["C#CCCCn1", "CCCCn1c", "CC(C)(C)CCC1(c2"] +} \ No newline at end of file diff --git a/external_documentation/REINVENT_result.json b/external_documentation/REINVENT_result.json new file mode 100644 index 0000000..86894dd --- /dev/null +++ b/external_documentation/REINVENT_result.json @@ -0,0 +1,9 @@ +{ + "results": [ + { + "values_key": "score", + "values": [1.0, 2.1, 3.2, "", "", 4.3, 7.0] + } + ], + "names": ["mol1", "mol2", "mol3", "mol4", "mol5", "mol6", "ref_mol"] +} \ No newline at end of file diff --git a/external_documentation/fep_mapper.txt b/external_documentation/fep_mapper.txt new file mode 100644 index 0000000..6636444 --- /dev/null +++ b/external_documentation/fep_mapper.txt @@ -0,0 +1,102 @@ +# Version 2020-4 + +Command line: $SCHRODINGER/run -FROM scisol fep_mapper.py -full-help +usage: fep_mapper.py [-h] [-v] [-full-help] [-o BASENAME] [-s BASENAME] [-x FILENAME] [-r RECEPTOR] [-e N] + [-rha RECEPTOR_HOTATOMS_ASL] [-ligand-hotatoms-rule-complex LIGAND_HOTATOMS_RULE_COMPLEX] + [-ligand-hotatoms-rule-solvent LIGAND_HOTATOMS_RULE_SOLVENT] [-t TOPOLOGY] [-b BIAS] [-add-bias ADD_BIAS] + [-C CUTOFF] [-simi-cutoff SIMI_CUTOFF] [-num-procs NUM_PROCS] [-align-core-only] [-rule RULE] [-simiscore] + [-force-optimize] [-connect-disconnected-nodes] [-generate-neutral-intermediates] [-clear-predicted-ddg] + [-m ATOM_MAPPING] [-strict-matching] [-extend-mapping] + [-mapping-scheme {position,position_and_bonding,protein_by_residue}] [-debug] [-mp METALLOPROTEIN] [-ats] + [-ats-min-barrier-height ATS_MIN_BARRIER_HEIGHT] [-ats-max-bond-dist ATS_MAX_BOND_DIST] + [-ats-max-core-reduction ATS_MAX_CORE_REDUCTION] + [structure_or_graph] + +Popular examples: + +- Generate optimized-topology graph: + + $SCHRODINGER/run -FROM scisol fep_mapper.py foo.mae -o foo + +- Generate optimized-topology graph with custom core: + + $SCHRODINGER/run -FROM scisol fep_mapper.py foo.mae -o foo -m "CC(=O)NCC(=O)NC" + +- Generate star-topology graph with custom core: + + $SCHRODINGER/run -FROM scisol fep_mapper.py foo.mae -o foo -m "CC(=O)NCC(=O)NC" -t star + +positional arguments: + structure_or_graph Structure file in Pose Viewer format (_pv.mae) or graph (.fmp) file. If .fmp file is given, the graph + optimization is only performed with -force-optimize option. + +optional arguments: + -h, --help show this help message and exit + -v, -version show program's version number and exit + -full-help List all available options. + -o BASENAME, -output BASENAME + output files' base name. Files to be written: .edge, .fmp. + -s BASENAME, -siminp BASENAME + simulation input files' base name. When this option is specified, a number of input files for FEP simulations + will be written out. + -x FILENAME, -extend FILENAME + extend the graph as saved in file FILENAME. + -r RECEPTOR, --receptor RECEPTOR + -receptor is DEPRECATED: Please specify -environment + -e N, -environment N specify the initial N structures as the common environment structures. This option is needed when you want to + write out structure input files for relative binding free energy calculations. + -rha RECEPTOR_HOTATOMS_ASL, -receptor-hotatoms-asl RECEPTOR_HOTATOMS_ASL + ASL expression to specify receptor hot atoms. Ligand hot atoms are reset to default unless '-ligand-hotatoms- + rule-complex' and '-ligand-hotatoms-rule-solvent' are specified + -ligand-hotatoms-rule-complex LIGAND_HOTATOMS_RULE_COMPLEX + REST rule for ligand in complex leg. Must be used with -receptor-hotatoms-asl option. + -ligand-hotatoms-rule-solvent LIGAND_HOTATOMS_RULE_SOLVENT + REST rule for ligand in solvent leg. Must be used with -receptor-hotatoms-asl option. + -t TOPOLOGY, -topology TOPOLOGY + Graph topology type, available options: ['full', 'normal', 'star', 'windmill']. Default is 'normal' + -b BIAS, -bias BIAS A 'bias' value will result in a graph with biased nodes (hubs), which correspond to structures with nonzero + values of the CT-level property 'i_fepmapper_bias'. + -add-bias ADD_BIAS Tag the given ligand as bias. + -C CUTOFF, -cutoff CUTOFF + Specifies the maximum number of unmapped atoms between the two structures in the edge. If the number of + unmapped atoms is greater than CUTOFF, the similarity score of the edge is zero. Note that higher similarity- + score cutoffs correspond to smaller values of this option. Default: No cutoff. Also see -simi-cutoff + -simi-cutoff SIMI_CUTOFF + Specifies the minimum similarity score. Edges with lower similarity scoreswill be deleted. Default: 0 (no + cutoff) + -num-procs NUM_PROCS Number of processes used for graph optimization. Default is to use 1 CPU core + -align-core-only do not adjust the non-core atoms when aligning the core atoms. + -rule RULE specify custom rules for similarity-score calculation. Default value is: + 'Mcs,Charge,SoftBond,MinimumNumberOfAtom,SnapCoreRmsd,BidirectionSnapCore'. + -simiscore print out detailed account of similarity scores for all pairs. + -force-optimize If a graph file is given, it will be re(optimize) if this option is provided + -connect-disconnected-nodes + Only optimize non-edge nodes. This option only works if fmp file is given + -generate-neutral-intermediates + Add neutral ligand between core hopping and charged ligand. This option is currentlyonly availabe for star + graph + -clear-predicted-ddg, -ignore-ddg + Clear existing predicted ddG values. + -m ATOM_MAPPING, -atom-mapping ATOM_MAPPING + Specify custom core with SMARTS pattern(s). Multiple SMARTS patterns should be separated with space, e.g., + `-m "CCN CCO"`. Note: Each molecule should ideally match only one SMARTS. If a molecule matches more than 1 + SMARTS, the longest match will be chosen, and this match should be unique, otherwise matching fails due to + ambiguity. This option can be used with -extend-mapping. + -strict-matching If set, unsuccessful core smarts matching on a molecule will eliminate all edges from this molecule. If not + set, all atoms will be used for matching in case of unsuccessful core smarts matching. + -extend-mapping If given, the match will be allowed as an extension of the SMARTS pattern; if false, the match should be just + the SMARTS partern (will warn when a unique match cannot be found + -mapping-scheme {position,position_and_bonding,protein_by_residue} + Mechanism for mapping atoms between structures: position: simple position-based mapping; + position_and_bonding: approximate position and bonding based mapping; protein_by_residue: residue mapping for + selectivity FEP. Default = position_and_bonding + -debug + -mp METALLOPROTEIN, -metalloprotein METALLOPROTEIN + write out siminp files for metalloprotein workflow. + -ats perform automated torsional scaling + -ats-min-barrier-height ATS_MIN_BARRIER_HEIGHT + min barrier height for flagging torsions to scale (default: 8.0) + -ats-max-bond-dist ATS_MAX_BOND_DIST + max bond dist from mutations for flagging torsions to scale (default: 3) + -ats-max-core-reduction ATS_MAX_CORE_REDUCTION + max permitted number of atoms removed from core due to ats (default: 10) diff --git a/external_documentation/fep_plus.txt b/external_documentation/fep_plus.txt new file mode 100644 index 0000000..c5db7d1 --- /dev/null +++ b/external_documentation/fep_plus.txt @@ -0,0 +1,109 @@ +# Version 2020-4 + +usage: +* Run a new job: + $SCHRODINGER/fep_plus -HOST -SUBHOST -JOBNAME + +* Run a new job with custom workflow: + $SCHRODINGER/fep_plus -HOST -SUBHOST -JOBNAME -m + +* Restart a previously interrupted job: + $SCHRODINGER/fep_plus -HOST -SUBHOST -JOBNAME -RESTART -checkpoint + +* Extend production simulations for certain edges: + $SCHRODINGER/fep_plus -HOST -SUBHOST -JOBNAME -extend -checkpoint + An example for the format of an edge-file: + 36da5ad:397128e + 33dd5ad:347118e + 33fe5ad:3171f8e + Each line specifies an edge with the two node's IDs. Each node ID is a hex + number of at least 7 digits. The two IDs are separated by a ':' (or '-' or + '_'). + +* Prepare input files for multisim. Do NOT run job: + $SCHRODINGER/fep_plus -HOST -SUBHOST -JOBNAME -prepare + +* Run a protein residue mutation job: + $SCHRODINGER/fep_plus -HOST -SUBHOST -JOBNAME -protein -solvent_asl + follows the same format as used by $SCHRODINGER/run residue_scanning_backend.py -muts_file + +* Run a protein stability job: + $SCHRODINGER/fep_plus -HOST -SUBHOST -JOBNAME -protein + +* Add mutations to a complete protein fep job: + $SCHRODINGER/fep_plus -HOST -SUBHOST -JOBNAME -protein -expand_protein + +* Run a metalloprotein FEP job: + $SCHRODINGER/fep_plus -HOST -SUBHOST -JOBNAME -mp + +positional arguments: + inp_file A fmp or a pv structure file + +optional arguments: + -h, --help show this help message and exit + -m Use custom workflow instead of the auto-generated one. + -ff {OPLS_2005|OPLS3e} + Specify the forcefield to use. Default: OPLS3e. + -seed Specify seed of pseudorandom number generator for initial atom velocities. Default: 2014 + -ppj PPJ Specify number of processors per job. Default: 4. + -checkpoint + Specify the multisim checkpoint file. + -prepare Do not run job. Only prepare multisim input files. + -JOBNAME Specify the job name. + -buffer Specify a larger buffer size (in Angstroms). Defaults: 5 in complex leg; 5 in solvent leg of protein-residue-mutation FEP; + 10 in solubility FEP; 10 in solvent leg of other types of FEP. The custom value will be used only if it's greater than the + corresponding default values. + -maxjob Maximum number of simultaneous subjobs. Default: 0 (unlimited) + -lambda-windows , -lambda_windows + Number of lambda windows for the default protocol. Default: 12 + -ensemble {muVT|NPT|NVT} + Specify the ensemble class. Default: muVT. + -time Specify the production-simulation time (in ps). For extension, this option specifies the additional simulation time (in + ps). Default: 5000.0. Min value: 500.0. + -protein Generate and run protein residue mutation if a mutation_file is given here and a solvent_asl is also provided; Generate and + run protein stability when a mutation_file is given here and no solvent_asl is provided + -mp [] + Generate and run metalloprotein workflow. + -solvent-asl SOLVENT_ASL, -solvent_asl SOLVENT_ASL + Specify ASL to put in solvent leg for protein residue mutation + -vacuum Include vacuum simulations. Only supported for small molecule FEP. + -extend Extend production simulations of specified edges. + -atom-mapping , -atom_mapping + Atom mapping specification for leadoptmap.py. For small molecule FEP, specify SMARTS string to customize core assignment; + for protein residue mutation FEP, 'sidechain' is the only argument allowing the side chain atoms to be mapped as well while + by default the side chains are not mapped. This option will be ignored if fmp file is provided as input. + -modify-dihe, -modify_dihe + Modify retained dihedral angle interactions for customized core. + -no-h-mass, -no_h_mass + Turn off hydrogen mass repartitioning (on by default). + -membrane Indicates the model system is a membrane protein system, such as the GPCR. + -charged-lambda-windows , -charged_lambda_windows + Number of lambda windows for the charge protocol. Default: 24 + -core-hopping-lambda-windows , -core_hopping_lambda_windows + Number of lambda windows for the core-hopping protocol. Default: 16 + -residue-structure , -residue_structure + Noncanonical amino acids for protein mutation. + -expand-protein EXPAND_PROTEIN, -expand_protein EXPAND_PROTEIN + Pass the structure file for protein fep to re-run with additional mutations. + -water Specify the water model used in the simulations. Valid values: SPC, TIP3P, TIP4P, TIP4PEW, TIP4PD, TIP5P. Default: SPC + -custom-charge-mode , -custom_charge_mode + Set the custom charge calculation mode when using the OPLS3e forcefield.Default is to 'assign' custom charges based on the + input geometries.Set to 'clear' to clear custom charges without assigning them.Set to 'keep' to keep existing custom charge + parameters. + -skip-leg [] + Specify the legs to skip (complex/solvent/vacuum). Pass in multiple times to skip multiple legs + +Job Control Options: + -HOST Run job remotely on the indicated host entry. + -WAIT Do not return a prompt until the job completes. + -LOCAL Do not use a temporary directory for job files. Keep files in the current directory. + -D, -DEBUG Show details of Job Control operation. + -TMPDIR TMPDIR The name of the directory used to store files temporarily during a job. + -SAVE Return zip archive of job directory at job completion. + -OPLSDIR OPLSDIR Specifies directory for custom forcefield parameters. + +Standard Options: + -SUBHOST or -SUBHOST or -SUBHOST "hostname1:nproc1 ... hostnameN:nprocN" + Run the subjobs on the specified hosts. The driver is run on the host specified with -HOST. + -RETRIES RETRIES If a subjob fails for any reason, it will be retried RETRIES times. + -RESTART Restart a previously failed job, utilizing any already completed subjobs. diff --git a/external_documentation/prime_arguments.txt b/external_documentation/prime_arguments.txt new file mode 100644 index 0000000..093d046 --- /dev/null +++ b/external_documentation/prime_arguments.txt @@ -0,0 +1,180 @@ +$SCHRODINGER/prime_mmgbsa [] + + Any option can be placed in an input file for ease of use. The input + structure should be specified on a line using "STRUCT_FILE ". + All other options should be placed one-per-line with the preceding dash + removed. For example, the following two calculations are equivalent: + + > prime_mmgbsa file_pv.mae -job_type ENERGY -lcons SMARTS.C + + or + + > prime_mmgbsa input.inp + + where the contents of input.inp are: + + STRUCT_FILE file_pv.mae + JOB_TYPE ENERGY + LCONS SMARTS.C + + Note that HOST, SUBHOST and NICE flags must be set via the commandline. + + All atoms in the ligand will have the i_psp_Prime_MMGBSA_Ligand property + set to 1, so that asl expressions can then be used for specifying parts of + the structure relative to the ligand. For example, + "fillres within 5 (atom.i_psp_Prime_MMGBSA_Ligand 1)" + selects all residues within 5 Angstroms of the ligand. + + SMARTS expressions can be included in all asl expressions. The command + -lcons SMARTS.C will apply constraints to all aliphatic carbons in the ligand. + + run $SCHRODINGER/prime_mmgbsa -h for a complete listing of all options. + + +positional arguments: + struct_file For most situations, this should be a Maestro file + with the receptor as the first entry, followed by the + ligand poses (e.g. a Glide pose viewer file). If the + -ligand option is specified, then the input should + instead be a Maestro file with each entry containing a + protein-ligand complex. + +optional arguments: + -h, --help show this help message and exit + -report_prime_log REPORT_PRIME_LOG + (yes/no) Return an output file with the extension + .Prime.log with the Prime logfiles of all component + jobs. By default this in included if less than 100 + ligands are used + -csv_output CSV_OUTPUT + (yes/no) Return a csv format output file -out.csv with + the calculated energies. + -report_top REPORT_TOP + Report the specified number of top-scoring ligands in + the log file. All ligands are reported in the CSV and + structure output files. + -v show program's version number and exit + -jobname JOBNAME, -JOBNAME JOBNAME, -j JOBNAME + Set the base name of outputs + -restart_file RESTART_FILES + Output of a partially completed subjob (this keyword + can be can be used multiple times if there are + multiple completed subjobs) + -RESTART Guess names of restart files for -restart_file option + +Input: + -ligand LIGAND_ASL Specify the ligand with an asl expression (required + for trajectory processing). If this option is present + then the input should be a maestro input file with + each entry containing both the ligand and the + receptor. The asl expression provided here will be + used to determine which part of the complex structure + is the ligand. + +Output: + -out_type {PV,COMPLEX,LIGAND,FLEXIBLE,COMPLETE} + The type of Maestro file to output. Choices are PV, + COMPLEX, LIGAND, FLEXIBLE and COMPLETE . LIGAND + produces a ligand-only file. PV will produce a + combination of the input receptor and the optimized + ligand positions. COMPLEX will return the optimized + ligand and receptor conformations. FLEXIBLE outputs + the optimized conformations of the flexible portions. + Please note that this is not a full protein structure + and cannot be used for any subsequent calculations. Of + these, only COMPLEX will return the full optimized + receptor conformation. PV files are default if + inputting a PV file, COMPLEX files are the default if + inputting a series of complexes. COMPLETE includes the + optimized free receptor and ligand structures to the a + complex output file + +Molecular Mechanics (PRIME): + -receptor_region RSEL_ASL, -rsel RSEL_ASL + Designate a region of the receptor as flexible using + an asl expression. Expressions can refer to atoms in + the ligand or the receptor and the selected region is + the union of all the regions for each ligand-receptor + pair in the input PV file. By default the entire + receptor is frozen. + -rflexdist FLEXDIST, -flexdist FLEXDIST + Treat all residues within this distance of the ligand + as flexible. By default the entire receptor is frozen. + (overwrites -receptor_region flag) + -rflexgroup {residue,side,polarh} + Select a portion of the region defined with rflexdist + flag to be flexible. residue: Choose the entire + residue. side: Choose the sidechain of each residue. + polarh: Choose the polar hydrogens on each residue. + -target_flexibility Run a two-stage MMGBSA calculation where the second + stage runs with the subset of flexible protein + residues identified by the first + -target_flexibility_cutoff TARGET_FLEXIBILITY_CUTOFF + Cutoff for determining movement for target flexibility + in Angstroms + -ligand_region LSEL_ASL, -lsel LSEL_ASL + Choose a section of the ligand to be treated as + flexible. By default the entire ligand is flexible. + -job_type {ENERGY,REAL_MIN,SIDE_PRED,SIDE_COMBI,SITE_OPT,PGL} + Prime jobtype to use to sample flexible regions. + Setting this option multiple times will result in + multiple sampling algorithms being used. Options are: + REAL_MIN (default): Local minimization. ENERGY: No + sampling, just calculate a single-point energy. + SIDE_PRED: Optimize sidechains using Prime sidechain + prediction. SIDE_COMBI: Optimize sidechain using + Combinatorial Sidechain Prediction. (Limited to <5 + sidechains). SITE_OPT: Run a binding-site optimization + consiting of prime sidechain predictions and + minimziations designed specifically for predicting + induced fit effects. PGL: Run a Prime PGL Binding-Site + Optimization on all flexible regions. See the manual + for more details on this protocol. + -rigid_body Minimize the ligand as a rigid body + -num_output_struct NUM_OUTPUT_STRUCT + The maximum number of poses to return per compound. + This will only be relevant if mulitple job types are + selected or job types that return multiple outputs are + used. + -lcons LCONS Select a portion of the ligand to harmonically + constrain with an ASL expression. By default no + constraints are used. + -rcons RCONS Select a portion of the receptor to harmonically + constrainwith an ASL expression. By default no + constraints are used. + -str_cons STR_CONS Strength of Receptor and Ligand Constraints in + kcals/mol/A^2. The default value is 1.0 kcal/mol/A^2 + -fbw_cons FBW_CONS Width of flat bottom potential for constraints in A. + The default value is 0.0A + -prime_opt PRIME_OPTIONS + Pass any keyword value pair to the Prime Refinement + stage in the form =. See the "Refining + Protein Structures" chapter in the Prime User Manual + for a description of available options. If you would + like to change the force field from it's default value + ( OPLS3e if the proper license is present ) use + -prime_opt OPLS_VERSION=OPLS_2005 + -use_ligand_charges Use the partial charges in the input ligand file. + -frozen Do not treat any part of the ligand or receptor as + flexible. This overwrites the -ligand_region and + -receptor_region flags. + -membrane Use Prime implicit membrane model (must be set up in + receptor file through Maestro) + +Watermap: + -watermap WATERMAP_FN, -WATERMAP WATERMAP_FN + Score ligands against this watermap. Input should be a + ct file containing the watermap generated with the + current version of the Schrodinger suite. + +Job Control Options: + -HOST Run job remotely on the indicated host entry. + -WAIT Do not return a prompt until the job completes. + -D, -DEBUG Show details of Job Control operation. + -NOJOBID Run the job directly, without Job Control layer. + +Standard Options: + -NJOBS NJOBS Divide the overall job into NJOBS subjobs. + -RETRIES RETRIES If a subjob fails for any reason, it will be retried + RETRIES times. (Default: 3) + -NOLAUNCH Set up subjob inputs, but don't run the jobs. diff --git a/icolos/__init__.py b/icolos/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/config/amber/default_mmpbsa.in b/icolos/config/amber/default_mmpbsa.in new file mode 100644 index 0000000..843bebf --- /dev/null +++ b/icolos/config/amber/default_mmpbsa.in @@ -0,0 +1,6 @@ +&general +startframe=0, endframe=5000000000, verbose=2, +/ +&gb +igb=5, saltcon=0.150, +/ diff --git a/icolos/config/cosmo/default_cosmo.config b/icolos/config/cosmo/default_cosmo.config new file mode 100644 index 0000000..c7dcf71 --- /dev/null +++ b/icolos/config/cosmo/default_cosmo.config @@ -0,0 +1,28 @@ +ctd = BP_TZVPD_FINE_20.ctd cdir = "/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/CTDATA-FILES" ldir = "/opt/scp/software/COSMOtherm/20.0.0/licensefiles" +unit notempty wtln ehfile +!! generated by COSMOthermX !! +f = mol.cosmo +f = "h2o_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/h" VPfile +f = "methanol_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/m" VPfile +f = "1-octanol_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" Comp = "1-octanol" [ VPfile +f = "1-octanol_c1.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" +f = "1-octanol_c2.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" +f = "1-octanol_c3.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" +f = "1-octanol_c4.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" +f = "1-octanol_c5.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" +f = "1-octanol_c6.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" ] +f = "dimethylsulfoxide_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/d" VPfile +f = "cyclohexane_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/c" VPfile +f = "chcl3_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/c" VPfile +f = "propanone_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/p" VPfile +f = "acetonitrile_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/a" VPfile +f = "thf_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/t" VPfile +henry= 2 tc=25.0 GSOLV +henry= 3 tc=25.0 GSOLV +henry= 4 tc=25.0 GSOLV +henry= 5 tc=25.0 GSOLV +henry= 6 tc=25.0 GSOLV +henry= 7 tc=25.0 GSOLV +henry= 8 tc=25.0 GSOLV +henry= 9 tc=25.0 GSOLV +henry= 10 tc=25.0 GSOLV \ No newline at end of file diff --git a/icolos/config/desmond/config.msj b/icolos/config/desmond/config.msj new file mode 100644 index 0000000..babcf80 --- /dev/null +++ b/icolos/config/desmond/config.msj @@ -0,0 +1,23 @@ +task { + task = "desmond:auto" +} + +build_geometry { + add_counterion = { + ion = Cl + number = neutralize_system + } + box = { + shape = orthorhombic + size = [10.0 10.0 10.0 ] + size_type = buffer + } + override_forcefield = S-OPLS + rezero_system = false + solvent = TIP3P +} + +assign_forcefield { + forcefield = S-OPLS + water = TIP3P +} \ No newline at end of file diff --git a/icolos/config/desmond/prod.cfg b/icolos/config/desmond/prod.cfg new file mode 100644 index 0000000..32c426f --- /dev/null +++ b/icolos/config/desmond/prod.cfg @@ -0,0 +1,69 @@ +annealing = false +backend = { +} +bigger_rclone = false +checkpt = { + first = 0.0 + interval = 240.06 + name = "$JOBNAME.cpt" + write_last_step = true +} +cpu = 1 +cutoff_radius = 9.0 +elapsed_time = 0.0 +energy_group = false +eneseq = { + first = 0.0 + interval = 1.2 + name = "$JOBNAME$[_replica$REPLICA$].ene" +} +ensemble = { + barostat = { + tau = 2.0 + } + class = NPT + method = MTK + thermostat = { + tau = 1.0 + } +} +glue = solute +maeff_output = { + first = 0.0 + interval = 120.0 + name = "$JOBNAME$[_replica$REPLICA$]-out.cms" + periodicfix = true + trjdir = "$JOBNAME$[_replica$REPLICA$]_trj" +} +meta = false +meta_file = ? +pressure = [1.01325 isotropic ] +randomize_velocity = { + first = 0.0 + interval = inf + seed = 2007 + temperature = "@*.temperature" +} +restrain = none +simbox = { + first = 0.0 + interval = 1.2 + name = "$JOBNAME$[_replica$REPLICA$]_simbox.dat" +} +surface_tension = 0.0 +taper = false +temperature = [ + [300.0 0 ] +] +time = 10.0 +timestep = [0.002 0.002 0.006 ] +trajectory = { + center = [] + first = 0.0 + format = dtr + frames_per_file = 250 + interval = 2.0 + name = "$JOBNAME$[_replica$REPLICA$]_trj" + periodicfix = true + write_velocity = false +} \ No newline at end of file diff --git a/icolos/config/desmond/production.msj b/icolos/config/desmond/production.msj new file mode 100644 index 0000000..358ca55 --- /dev/null +++ b/icolos/config/desmond/production.msj @@ -0,0 +1,102 @@ +task { + task = "desmond:auto" + set_family = { + desmond = { + checkpt.write_last_step = no + } + } +} + +simulate { + title = "Brownian Dynamics NVT, T = 10 K, small timesteps, and restraints on solute heavy atoms, 100ps" + annealing = off + time = 100 + timestep = [0.001 0.001 0.003 ] + temperature = 10.0 + ensemble = { + class = "NVT" + method = "Brownie" + brownie = { + delta_max = 0.1 + } + } + restrain = { + atom = "solute_heavy_atom" + force_constant = 50.0 + } +} + +simulate { + title = "NVT, T = 10 K, small timesteps, and restraints on solute heavy atoms, 12ps" + annealing = off + time = 12 + timestep = [0.001 0.001 0.003] + temperature = 10.0 + restrain = { atom = solute_heavy_atom force_constant = 50.0 } + ensemble = { + class = NVT + method = Langevin + thermostat.tau = 0.1 + } + + randomize_velocity.interval = 1.0 + eneseq.interval = 0.3 + trajectory.center = [] +} + +simulate { + title = "NPT, T = 10 K, and restraints on solute heavy atoms, 12ps" + annealing = off + time = 12 + temperature = 10.0 + restrain = retain + ensemble = { + class = NPT + method = Langevin + thermostat.tau = 0.1 + barostat .tau = 50.0 + } + + randomize_velocity.interval = 1.0 + eneseq.interval = 0.3 + trajectory.center = [] +} + +simulate { + title = "NPT and restraints on solute heavy atoms, 12ps" + effect_if = [["@*.*.annealing"] 'annealing = off temperature = "@*.*.temperature[0][0]"'] + time = 12 + restrain = retain + ensemble = { + class = NPT + method = Langevin + thermostat.tau = 0.1 + barostat .tau = 50.0 + } + + randomize_velocity.interval = 1.0 + eneseq.interval = 0.3 + trajectory.center = [] +} + +simulate { + title = "NPT and no restraints, 24ps" + effect_if = [["@*.*.annealing"] 'annealing = off temperature = "@*.*.temperature[0][0]"'] + time = 24 + ensemble = { + class = NPT + method = Langevin + thermostat.tau = 0.1 + barostat .tau = 2.0 + } + + eneseq.interval = 0.3 + trajectory.center = solute +} + +simulate { + cfg_file = "prod.cfg" + jobname = "$MASTERJOBNAME" + dir = "." + compress = "" +} \ No newline at end of file diff --git a/icolos/config/logging/debug.json b/icolos/config/logging/debug.json new file mode 100644 index 0000000..641ed79 --- /dev/null +++ b/icolos/config/logging/debug.json @@ -0,0 +1,75 @@ +{ + "version": 1, + "disable_existing_loggers": false, + "formatters": { + "standard": { + "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s", + "datefmt": "%Y-%m-%d %H:%M:%S" + }, + "blank": { + "format": "%(message)s" + } + }, + + "handlers": { + "console": { + "class": "logging.StreamHandler", + "level": "DEBUG", + "formatter": "standard", + "stream": "ext://sys.stderr" + }, + + "file_handler": { + "class": "logging.handlers.RotatingFileHandler", + "level": "DEBUG", + "formatter": "standard", + "filename": "icolos_run.log", + "maxBytes": 10485760, + "backupCount": 20, + "encoding": "utf8" + }, + + "file_handler_blank": { + "class": "logging.handlers.RotatingFileHandler", + "level": "DEBUG", + "formatter": "blank", + "filename": "icolos_run.log", + "maxBytes": 10485760, + "backupCount": 20, + "encoding": "utf8" + } + }, + + "loggers": { + "command_line_interface": { + "level": "DEBUG", + "handlers": ["file_handler"], + "propagate": false + }, + "target_preparation": { + "level": "DEBUG", + "handlers": ["file_handler"], + "propagate": false + }, + "ligand_preparation": { + "level": "DEBUG", + "handlers": ["file_handler"], + "propagate": false + }, + "docking": { + "level": "DEBUG", + "handlers": ["file_handler"], + "propagate": false + }, + "blank": { + "level": "DEBUG", + "handlers": ["file_handler_blank"], + "propagate": false + } + }, + + "root": { + "level": "DEBUG", + "handlers": ["file_handler"] + } +} diff --git a/icolos/config/logging/default.json b/icolos/config/logging/default.json new file mode 100644 index 0000000..7627e0a --- /dev/null +++ b/icolos/config/logging/default.json @@ -0,0 +1,75 @@ +{ + "version": 1, + "disable_existing_loggers": false, + "formatters": { + "standard": { + "format": "%(asctime)s - %(message)s", + "datefmt": "%Y-%m-%d %H:%M:%S" + }, + "blank": { + "format": "%(message)s" + } + }, + + "handlers": { + "console": { + "class": "logging.StreamHandler", + "level": "INFO", + "formatter": "standard", + "stream": "ext://sys.stderr" + }, + + "file_handler": { + "class": "logging.handlers.RotatingFileHandler", + "level": "INFO", + "formatter": "standard", + "filename": "icolos_run.log", + "maxBytes": 10485760, + "backupCount": 20, + "encoding": "utf8" + }, + + "file_handler_blank": { + "class": "logging.handlers.RotatingFileHandler", + "level": "INFO", + "formatter": "blank", + "filename": "icolos_run.log", + "maxBytes": 10485760, + "backupCount": 20, + "encoding": "utf8" + } + }, + + "loggers": { + "command_line_interface": { + "level": "INFO", + "handlers": ["file_handler"], + "propagate": false + }, + "target_preparation": { + "level": "INFO", + "handlers": ["file_handler"], + "propagate": false + }, + "ligand_preparation": { + "level": "INFO", + "handlers": ["file_handler"], + "propagate": false + }, + "docking": { + "level": "INFO", + "handlers": ["file_handler"], + "propagate": false + }, + "blank": { + "level": "INFO", + "handlers": ["file_handler_blank"], + "propagate": false + } + }, + + "root": { + "level": "INFO", + "handlers": ["file_handler"] + } +} diff --git a/icolos/config/logging/tutorial.json b/icolos/config/logging/tutorial.json new file mode 100644 index 0000000..1b7ceb5 --- /dev/null +++ b/icolos/config/logging/tutorial.json @@ -0,0 +1,75 @@ +{ + "version": 1, + "disable_existing_loggers": false, + "formatters": { + "standard": { + "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s", + "datefmt": "%Y-%m-%d %H:%M:%S" + }, + "blank": { + "format": "%(message)s" + } + }, + + "handlers": { + "console": { + "class": "logging.StreamHandler", + "level": "DEBUG", + "formatter": "standard", + "stream": "ext://sys.stdout" + }, + + "file_handler": { + "class": "logging.handlers.RotatingFileHandler", + "level": "DEBUG", + "formatter": "standard", + "filename": "icolos_run.log", + "maxBytes": 10485760, + "backupCount": 20, + "encoding": "utf8" + }, + + "file_handler_blank": { + "class": "logging.handlers.RotatingFileHandler", + "level": "DEBUG", + "formatter": "blank", + "filename": "icolos_run.log", + "maxBytes": 10485760, + "backupCount": 20, + "encoding": "utf8" + } + }, + + "loggers": { + "command_line_interface": { + "level": "DEBUG", + "handlers": ["file_handler"], + "propagate": false + }, + "target_preparation": { + "level": "DEBUG", + "handlers": ["file_handler"], + "propagate": false + }, + "ligand_preparation": { + "level": "DEBUG", + "handlers": ["file_handler"], + "propagate": false + }, + "docking": { + "level": "DEBUG", + "handlers": ["file_handler"], + "propagate": false + }, + "blank": { + "level": "DEBUG", + "handlers": ["file_handler_blank"], + "propagate": false + } + }, + + "root": { + "level": "DEBUG", + "handlers": ["file_handler"] + } +} diff --git a/icolos/config/logging/verbose.json b/icolos/config/logging/verbose.json new file mode 100644 index 0000000..7627e0a --- /dev/null +++ b/icolos/config/logging/verbose.json @@ -0,0 +1,75 @@ +{ + "version": 1, + "disable_existing_loggers": false, + "formatters": { + "standard": { + "format": "%(asctime)s - %(message)s", + "datefmt": "%Y-%m-%d %H:%M:%S" + }, + "blank": { + "format": "%(message)s" + } + }, + + "handlers": { + "console": { + "class": "logging.StreamHandler", + "level": "INFO", + "formatter": "standard", + "stream": "ext://sys.stderr" + }, + + "file_handler": { + "class": "logging.handlers.RotatingFileHandler", + "level": "INFO", + "formatter": "standard", + "filename": "icolos_run.log", + "maxBytes": 10485760, + "backupCount": 20, + "encoding": "utf8" + }, + + "file_handler_blank": { + "class": "logging.handlers.RotatingFileHandler", + "level": "INFO", + "formatter": "blank", + "filename": "icolos_run.log", + "maxBytes": 10485760, + "backupCount": 20, + "encoding": "utf8" + } + }, + + "loggers": { + "command_line_interface": { + "level": "INFO", + "handlers": ["file_handler"], + "propagate": false + }, + "target_preparation": { + "level": "INFO", + "handlers": ["file_handler"], + "propagate": false + }, + "ligand_preparation": { + "level": "INFO", + "handlers": ["file_handler"], + "propagate": false + }, + "docking": { + "level": "INFO", + "handlers": ["file_handler"], + "propagate": false + }, + "blank": { + "level": "INFO", + "handlers": ["file_handler_blank"], + "propagate": false + } + }, + + "root": { + "level": "INFO", + "handlers": ["file_handler"] + } +} diff --git a/icolos/config/panther/default_panther.in b/icolos/config/panther/default_panther.in new file mode 100644 index 0000000..28a35c1 --- /dev/null +++ b/icolos/config/panther/default_panther.in @@ -0,0 +1,74 @@ +# Please cite: Niinivehmas et al. (2015) J. Compt. Aided. Mol. Design. 29(10), 989-1006. + +# Input and output settings: +1-Pdb file (pfil):: {pdb_file} +2-Radius library (rlib):: rad.lib +3-Angle library (alib):: angles.lib +4-Charge library file (chlib):: charges.lib + +# Cavity location: +5-Center (s) (cent):: 24.42 21.79 18.26 +6-Radius center algorithm (radc):: 0.00 +7-Basic multipoint (bmp):: null + +# Pocket filling: +8-Filler radius (frad):: 0.85 + +9-Box radius (brad):: 24.0 +10-Box center (bcen):: null +11-Multibox (mbox):: Y + +#_# Identification settings: #_# +12-Not empty (nem):: HOH FAD NAP WAT NDP NAI NAD FDA +13-Force lining (flin):: null +14-Ignore lining (ilin):: null + +#_# Pocket tweaks: #_# +15-Add oxygen at HEM Fe / dual mode (OFed):: Y Y +16-Charge radius (chrad):: 0.00 +17-Lowest significant charge (+/-) (lowch):: 0.16 +18-Use waters as polar groups (watpol):: 2 +# - Exclusion settings: +19-Delete farther than (del):: 4.5 +20-ligand distance limit (ldlim):: X-0 2.0 +21-False connection angles (fcang):: 180 90 +22-False connection group size (fcgrp):: 200 +23-Exclusion zone (ezon):: null +24-Angle exclusion (aex):: null +25-plane-exclusion (pex):: null +26-Force plane exclusion center (fpec):: null + +# - Inclusion settings: +27-Global keep anyway radius (gkar):: 0.00 +28-Keep anyway radius (kar):: 7.00 +29-AA limit (aalim):: 0 +30-Specific limits (slim):: null + +# - Other settings: +31-Secondary (sec):: N +32-Cofactor fill (cofil):: N + +#_# Rather advanced pocket tweaks. #_# +33-Packing method (pack):: bcc +34-Creep radius (creep):: null +35-Full lining (fulli):: Y +36-Adjacent lining (adjli):: N +37-multibounds (mbo):: Y +38-Max distance of charged atoms (chdist):: 1.6 +39-Agonist-distance (agdist):: 2.5 + +#_# Various constants. #_# +40-Angle tolerance (atol):: 30 +41-Resolution Tolerance (retol):: 0.2 +42-Adjacent distance (adjdist):: 3 +43-Boundary increment (boinc):: 1.3 +44-lining id angle (lidang):: 35 + +# Misc & experimental +45-Radius for charged atoms (chatrad):: 0.5 +46-Exclusion distance for charged atoms and their residues (radexdres):: 0.6 +47-H-bond distance (hbdist):: 1.7 +48-donor addition (donads):: 1.0 +49-h-bond max distance (hobomax):: 4.2 + + diff --git a/icolos/config/unit_tests_config/cosmo/cosmo.config b/icolos/config/unit_tests_config/cosmo/cosmo.config new file mode 100644 index 0000000..7dd7131 --- /dev/null +++ b/icolos/config/unit_tests_config/cosmo/cosmo.config @@ -0,0 +1,28 @@ +ctd = BP_TZVPD_FINE_19.ctd cdir = "/opt/scp/software/COSMOtherm/19.0.4/COSMOthermX/../COSMOtherm/CTDATA-FILES" ldir = "/opt/scp/software/COSMOtherm/19.0.4/COSMOthermX/../licensefiles" +unit notempty wtln ehfile +!! generated by COSMOthermX !! +f = mol.cosmo +f = "h2o_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/h" VPfile +f = "methanol_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/m" VPfile +f = "1-octanol_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" Comp = "1-octanol" [ VPfile +f = "1-octanol_c1.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" +f = "1-octanol_c2.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" +f = "1-octanol_c3.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" +f = "1-octanol_c4.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" +f = "1-octanol_c5.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" +f = "1-octanol_c6.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" ] +f = "dimethylsulfoxide_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/d" VPfile +f = "cyclohexane_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/c" VPfile +f = "chcl3_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/c" VPfile +f = "propanone_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/p" VPfile +f = "acetonitrile_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/a" VPfile +f = "thf_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/t" VPfile +henry= 2 tc=25.0 GSOLV +henry= 3 tc=25.0 GSOLV +henry= 4 tc=25.0 GSOLV +henry= 5 tc=25.0 GSOLV +henry= 6 tc=25.0 GSOLV +henry= 7 tc=25.0 GSOLV +henry= 8 tc=25.0 GSOLV +henry= 9 tc=25.0 GSOLV +henry= 10 tc=25.0 GSOLV \ No newline at end of file diff --git a/icolos/config/unit_tests_config/cosmo/example.cosmo.config b/icolos/config/unit_tests_config/cosmo/example.cosmo.config new file mode 100644 index 0000000..7dd7131 --- /dev/null +++ b/icolos/config/unit_tests_config/cosmo/example.cosmo.config @@ -0,0 +1,28 @@ +ctd = BP_TZVPD_FINE_19.ctd cdir = "/opt/scp/software/COSMOtherm/19.0.4/COSMOthermX/../COSMOtherm/CTDATA-FILES" ldir = "/opt/scp/software/COSMOtherm/19.0.4/COSMOthermX/../licensefiles" +unit notempty wtln ehfile +!! generated by COSMOthermX !! +f = mol.cosmo +f = "h2o_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/h" VPfile +f = "methanol_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/m" VPfile +f = "1-octanol_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" Comp = "1-octanol" [ VPfile +f = "1-octanol_c1.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" +f = "1-octanol_c2.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" +f = "1-octanol_c3.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" +f = "1-octanol_c4.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" +f = "1-octanol_c5.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" +f = "1-octanol_c6.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" ] +f = "dimethylsulfoxide_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/d" VPfile +f = "cyclohexane_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/c" VPfile +f = "chcl3_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/c" VPfile +f = "propanone_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/p" VPfile +f = "acetonitrile_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/a" VPfile +f = "thf_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/t" VPfile +henry= 2 tc=25.0 GSOLV +henry= 3 tc=25.0 GSOLV +henry= 4 tc=25.0 GSOLV +henry= 5 tc=25.0 GSOLV +henry= 6 tc=25.0 GSOLV +henry= 7 tc=25.0 GSOLV +henry= 8 tc=25.0 GSOLV +henry= 9 tc=25.0 GSOLV +henry= 10 tc=25.0 GSOLV \ No newline at end of file diff --git a/icolos/config/unit_tests_config/example.config.json b/icolos/config/unit_tests_config/example.config.json new file mode 100644 index 0000000..cd6bf11 --- /dev/null +++ b/icolos/config/unit_tests_config/example.config.json @@ -0,0 +1,30 @@ +{ + "ICOLOS_TEST_DATA": "../../../IcolosData", + "OE_LICENSE": "/opt/scp/software/oelicense/1.0/oe_license.seq1", + "CREST_BINARY_LOCATION": "/projects/cc/mai/binaries", + "XTBHOME": "/projects/cc/mai/binaries/xtb-6.4.0", + "XTBPATH": "${XTBHOME}/share/xtb", + "TURBOMOLE_CONFIG": "/projects/cc/mai/material/Icolos/turbomole_config", + "ACPYPE": "/projects/cc/mai/binaries/acpype", + "PATH": "${PATH}:${XTBHOME}", + "PKG_CONFIG_PATH": "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig", + "PARNODES": "2", + "PARA_ARCH": "MPI", + "PANTHER_LOCATION": "/projects/cc/mai/binaries/panther", + "SHAEP_LOCATION": "/projects/cc/mai/binaries", + "FORCEFIELD": "/projects/cc/mai/material/Icolos/forcefields/charmm36-feb2021.ff", + "OPTUNA_AZ": { + "ENVIRONMENT_PYTHON": "/miniconda3/envs/Optuna_AZ/bin/python", + "ENTRY_POINT_LOCATION": "/OptunaAZ" + }, + "PMX": { + "PMX": "/envs/pmx/bin/pmx", + "PMX_PYTHON": "/envs/pmx/bin/python", + "CLI_ENTRYPOINT": "/pmx_az/src/pmx/scripts/mai_utils", + "GMXLIB": "/px_az/src/pmx/data/mutff" + }, + "DSSP": "/opt/scp/software/DSSP/4.0.0-GCCcore-8.2.0/bin", + "MINICONDA_BASE": ". //etc/profile.d/conda.sh", + "OPENMM_FORCEFIELDS": "/projects/cc/mai/material/Icolos/openmmforcefields", + "MDPLOT": "/projects/cc/mai/binaries/MDplot" +} \ No newline at end of file diff --git a/icolos/config_containers/__init__.py b/icolos/config_containers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/config_containers/container.py b/icolos/config_containers/container.py new file mode 100644 index 0000000..dcc4d44 --- /dev/null +++ b/icolos/config_containers/container.py @@ -0,0 +1,33 @@ +import abc +import json +import os + + +class ConfContainer(object, metaclass=abc.ABCMeta): + @abc.abstractmethod + def __init__(self, conf): + # get instance of configuration enum and load configuration + # parameter "config" can be a string, a path or a dictionary (as long as it holds valid JSON input) + if isinstance(conf, str): + if os.path.isfile(conf): + with open(conf) as file: + conf = file.read().replace("\r", "").replace("\n", "") + conf = json.loads(conf) + self._conf = conf + + def get_as_dict(self): + return self._conf + + def get(self, key, default=None): + return self._conf.get(key, default) + + def __getitem__(self, item): + return self.get_as_dict()[item] + + def get_as_string(self): + return json.dumps(self._conf) + + def validate(self): + raise NotImplementedError( + "This functions needs to be implemented by child classes." + ) diff --git a/icolos/config_containers/workflow_container.py b/icolos/config_containers/workflow_container.py new file mode 100644 index 0000000..8f8e374 --- /dev/null +++ b/icolos/config_containers/workflow_container.py @@ -0,0 +1,13 @@ +from icolos.config_containers.container import ConfContainer + + +class WorkflowContainer(ConfContainer): + def __init__(self, conf, validation=True): + super().__init__(conf=conf) + + # TODO: include validation with JSON Schema + if validation: + self.validate() + + def validate(self): + pass diff --git a/icolos/core/__init__.py b/icolos/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/core/composite_agents/__init__.py b/icolos/core/composite_agents/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/core/composite_agents/base_agent.py b/icolos/core/composite_agents/base_agent.py new file mode 100644 index 0000000..c82a56c --- /dev/null +++ b/icolos/core/composite_agents/base_agent.py @@ -0,0 +1,108 @@ +import os +from abc import abstractmethod +from copy import deepcopy +from typing import Dict, List + +from pydantic import BaseModel, PrivateAttr + +from icolos.loggers.agentlogger import AgentLogger + +from icolos.utils.enums.logging_enums import LoggingConfigEnum +from icolos.utils.enums.composite_agents_enums import WorkflowEnum + +_WE = WorkflowEnum() +_LE = LoggingConfigEnum() + + +class AgentEnvironmentParameters(BaseModel): + class WorkflowExportParameters(BaseModel): + key: str + value: str + + export: List[WorkflowExportParameters] = [] + + +class AgentHeaderParametersSettings(BaseModel): + remove_temporary_files: bool = True + single_directory: bool = False + + +class AgentHeaderParameters(BaseModel): + class AgentLoggingParameters(BaseModel): + logfile: str = None + + id: str = None + description: str = None + logging: AgentLoggingParameters = AgentLoggingParameters() + environment: AgentEnvironmentParameters = None + global_variables: Dict = None + global_settings: AgentHeaderParametersSettings = AgentHeaderParametersSettings() + + +class BaseAgent(BaseModel): + + # should also work without parsing the base specification here, but then IDEs will not pick up stuff below + header: AgentHeaderParameters = AgentHeaderParameters() + + class Config: + underscore_attrs_are_private = True + + _logger = PrivateAttr() + + def __init__(self, **data): + super().__init__(**data) + self._logger = AgentLogger() + + def _export_env_variables(self): + for var in self.header.environment.export: + key = str(var.key) + value = os.path.expandvars(str(var.value)) + os.environ[key] = value + self._logger.log(f"Exported variable {key} with value {value}.", _LE.DEBUG) + + def initialize(self): + self._export_env_variables() + + def _nested_update(self, inp, pattern: str, replacement: str): + if isinstance(inp, dict): + items = inp.items() + elif isinstance(inp, (list, tuple)): + items = enumerate(inp) + elif isinstance(inp, str): + return inp.replace(pattern, replacement) + else: + return inp + + for key, value in items: + inp[key] = self._nested_update(value, pattern, replacement) + return inp + + def _update_global_variables(self, conf: dict) -> dict: + conf = deepcopy(conf) + if self.header.global_variables is not None: + for key, value in self.header.global_variables.items(): + pattern = "{" + key + "}" + self._nested_update(inp=conf, pattern=pattern, replacement=value) + self._logger.log( + f"Updated global variable {key} with value {value}.", _LE.DEBUG + ) + return conf + + @abstractmethod + def execute(self): + raise NotImplementedError + + def is_valid(self) -> bool: + raise NotImplementedError + + def set_id(self, id: str): + self.header.id = id + + def get_id(self) -> str: + return self.header.id + + def set_description(self, description: str): + self.header.description = description + + def get_description(self) -> str: + return self.header.description diff --git a/icolos/core/composite_agents/scheduler.py b/icolos/core/composite_agents/scheduler.py new file mode 100644 index 0000000..41b4f57 --- /dev/null +++ b/icolos/core/composite_agents/scheduler.py @@ -0,0 +1,54 @@ +from pydantic import BaseModel, PrivateAttr + +from icolos.core.composite_agents.base_agent import BaseAgent, AgentHeaderParameters + +from icolos.utils.enums.logging_enums import LoggingConfigEnum +from icolos.utils.enums.composite_agents_enums import SchedulerEnum + +_SE = SchedulerEnum() +_LE = LoggingConfigEnum() + + +class SchedulerHeaderParameters(AgentHeaderParameters, BaseModel): + pass + + +class Scheduler(BaseAgent, BaseModel): + """Class to hold the whole logic for scheduling sub-jobs.""" + + header: SchedulerHeaderParameters = SchedulerHeaderParameters() + + class Config: + underscore_attrs_are_private = True + + _logger = PrivateAttr() + _initialized_steps = PrivateAttr() + + def __init__(self, **data): + super().__init__(**data) + + def initialize(self): + super().initialize() + + def execute(self): + # TODO: implement + pass + + def _action_prepare(self): + pass + + def _action_run(self): + pass + + def is_valid(self) -> bool: + # TODO: implement + pass + + def __repr__(self): + return "" % ( + self.get_id(), + self.get_description(), + ) + + def __str__(self): + return self.__repr__() diff --git a/icolos/core/composite_agents/workflow.py b/icolos/core/composite_agents/workflow.py new file mode 100644 index 0000000..515e6c0 --- /dev/null +++ b/icolos/core/composite_agents/workflow.py @@ -0,0 +1,143 @@ +from typing import Dict, List + +from pydantic import BaseModel, PrivateAttr +from icolos.core.containers.perturbation_map import PerturbationMap +from icolos.core.flow_control.flow_control import FlowControlBase +from icolos.core.job_control.job_control import StepJobControl + +from icolos.core.steps_utils import initialize_step_from_dict +from icolos.core.workflow_steps.step import StepBase +from icolos.core.composite_agents.base_agent import BaseAgent, AgentHeaderParameters +from icolos.utils.enums.step_enums import StepBaseEnum + +from icolos.utils.general.icolos_exceptions import get_exception_message + +from icolos.utils.enums.logging_enums import LoggingConfigEnum +from icolos.utils.enums.composite_agents_enums import WorkflowEnum + +_WE = WorkflowEnum() +_LE = LoggingConfigEnum() +_SBE = StepBaseEnum + + +class WorkflowHeaderParameters(AgentHeaderParameters, BaseModel): + pass + + +class WorkflowData(BaseModel): + work_dir: str = None + perturbation_map: PerturbationMap = None + + +class WorkFlow(BaseAgent, BaseModel): + """Class to hold the whole logic for a workflow.""" + + steps: List[Dict] = [] + header: WorkflowHeaderParameters = WorkflowHeaderParameters() + workflow_data: WorkflowData = WorkflowData() + + class Config: + underscore_attrs_are_private = True + + _logger = PrivateAttr() + _initialized_steps = PrivateAttr() + + def __init__(self, **data): + super().__init__(**data) + self._initialized_steps = [] + + def initialize(self): + super().initialize() + self._initialized_steps = [] + for step_conf in self.steps: + step_conf = self._update_global_variables(conf=step_conf) + step = initialize_step_from_dict(step_conf=step_conf) + if isinstance(step, StepBase): + # we have a normal step, no flow control wrapping + step.set_workflow_object(self) + self._initialized_steps.append(step) + elif isinstance(step, FlowControlBase): + # flow control has returned a list of steps, or a single JobControl step + if isinstance(step.initialized_steps, list): + for st in step.initialized_steps: + st.set_workflow_object(self) + self._initialized_steps.append(st) + elif isinstance(step.initialized_steps, StepJobControl): + # parallelize was set, returns a JobControl wrapper + # step.initialized_steps.initialized_steps. + # set_workflow_object(self) + for st in step.initialized_steps.initialized_steps: + st.set_workflow_object(self) + self._initialized_steps.append(step.initialized_steps) + self._logger.log( + f"Initialized {len(self._initialized_steps)} steps in workflow {self.header.id}.", + _LE.DEBUG, + ) + + def execute(self): + for step in self._initialized_steps: + step.generate_input() + self._logger.log(f"Starting execution of step: {step.step_id}", _LE.INFO) + step.execute() + self._logger.log( + f"Processing write-out blocks for {step.step_id}.", _LE.DEBUG + ) + step.process_write_out() + self._logger.log( + f"Execution of {len(self._initialized_steps)} steps completed.", _LE.INFO + ) + + def is_valid(self) -> bool: + try: + for step in self._initialized_steps: + step.validate() + except Exception as e: + self._logger.log( + f'During step validation, "WorkFlow" returned the following exception: {get_exception_message(e)}.', + _LE.WARNING, + ) + return False + return True + + def add_step(self, step: StepBase): + self._initialized_steps.append(step) + + def get_steps(self) -> list: + return self._initialized_steps + + def find_step_by_step_id(self, step_id: str): + for step in self._initialized_steps: + if step.step_id == step_id: + return step + elif step.type == _SBE.STEP_JOB_CONTROL: + # the steps themselves are buried in the _initialized_steps attribute of JobControl, + for st in step.initialized_steps: + if st.step_id == step_id: + return st + + raise IndexError(f"Could not find step with step_id {step_id} in workflow.") + + def __iter__(self): + return iter(self.steps) + + def __repr__(self): + return "" % ( + self.get_id(), + self.get_description(), + len(self), + ) + + def set_perturbation_map(self, p_map: PerturbationMap) -> None: + self.workflow_data.perturbation_map = p_map + + def get_perturbation_map(self) -> PerturbationMap: + return self.workflow_data.perturbation_map + + def __str__(self): + return self.__repr__() + + def __getitem__(self, key: int): + return self._initialized_steps[key] + + def __len__(self) -> int: + return len(self._initialized_steps) diff --git a/icolos/core/containers/__init__.py b/icolos/core/containers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/core/containers/compound.py b/icolos/core/containers/compound.py new file mode 100644 index 0000000..b36fc2d --- /dev/null +++ b/icolos/core/containers/compound.py @@ -0,0 +1,549 @@ +from copy import deepcopy +from typing import List +from rdkit import Chem + +from icolos.utils.enums.compound_enums import ( + CompoundContainerEnum, + EnumerationContainerEnum, +) +from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum +from icolos.core.step_utils.structconvert import StructConvert +from icolos.utils.general.icolos_exceptions import ContainerCorrupted +from icolos.utils.enums.write_out_enums import WriteOutEnum +from typing import Union +import numpy as np +import os + +_WE = WriteOutEnum() +_SEE = SchrodingerExecutablesEnum() + + +class Conformer: + """This class is a storage class for individual conformers associated with a given Enumeration.""" + + def __init__( + self, + conformer: Chem.Mol = None, + conformer_id: int = None, + enumeration_object=None, + ): + self._conformer = conformer + self._conformer_id = conformer_id + self._enumeration_object = enumeration_object + self._extra_data_dictionary = {} + + def get_compound_name(self) -> str: + if self.get_enumeration_object() is not None: + return self.get_enumeration_object().get_compound_name() + + def get_index_string(self) -> str: + enum_obj = self.get_enumeration_object() + enum_str = "" + if enum_obj is not None: + enum_str = enum_obj.get_index_string() + conf_str = "" + if self.get_conformer_id() is not None: + conf_str = str(self.get_conformer_id()) + return ":".join([enum_str, conf_str]) + + def add_extra_data(self, key: str, data): + self._extra_data_dictionary[key] = data + + def get_extra_data(self) -> dict: + return self._extra_data_dictionary + + def clear_extra_data(self): + self._extra_data_dictionary = {} + + def set_enumeration_object(self, enumeration_object): + self._enumeration_object = enumeration_object + + def get_enumeration_object(self): + return self._enumeration_object + + def get_molecule(self) -> Chem.Mol: + return self._conformer + + def set_molecule(self, conformer: Chem.Mol): + self._conformer = conformer + + def set_conformer_id(self, conformer_id: int): + self._conformer_id = conformer_id + + def get_conformer_id(self) -> int: + return self._conformer_id + + def empty(self) -> bool: + if self.get_molecule() is None: + return True + return False + + def _clone(self): + clone = Conformer( + conformer=deepcopy(self.get_molecule()), + conformer_id=self.get_conformer_id(), + enumeration_object=self.get_enumeration_object(), + ) + clone._extra_data_dictionary = deepcopy(self.get_extra_data()) + return clone + + def __copy__(self): + return self._clone() + + def __deepcopy__(self, memo): + return self._clone() + + def __repr__(self): + parent_enumeration_id = ( + None + if self.get_enumeration_object() is None + else self.get_enumeration_object().get_enumeration_id() + ) + return "" % ( + self.get_conformer_id(), + parent_enumeration_id, + ) + + def __str__(self): + return self.__repr__() + + def write(self, path: str, format_=_WE.SDF): + writer = Chem.SDWriter(path) + molecule = self.get_molecule() + molecule.SetProp(_WE.RDKIT_NAME, self.get_index_string()) + molecule.SetProp(_WE.INDEX_STRING, self.get_index_string()) + writer.write(molecule) + writer.close() + if format_ == _WE.PDB: + pdb_path = path.split(".")[0] + ".pdb" + # convert the written sdf file to a pdb with the schrodinger converter + converter = StructConvert(prefix_execution=_SEE.SCHRODINGER_MODULE) + converter.sdf2pdb(sdf_file=path, pdb_file=pdb_path) + os.remove(path) + + def update_coordinates(self, path: str): + old = self.get_molecule() + for mol in Chem.SDMolSupplier(path, removeHs=False): + mol.SetProp(_WE.RDKIT_NAME, old.GetProp(_WE.RDKIT_NAME)) + for prop in old.GetPropNames(): + mol.SetProp(prop, old.GetProp(prop)) + self.set_molecule(mol) + + # only one molecule expected at this stage, so stop after first run + break + self.write("".join([path, "_out"])) + + +class Enumeration: + """This class bundles all information on an enumeration, especially all conformers generated.""" + + def __init__( + self, + compound_object=None, + smile: str = "", + molecule: Chem.Mol = None, + original_smile: str = None, + enumeration_id: int = None, + ): + self._MC = CompoundContainerEnum() + self._EC = EnumerationContainerEnum() + self._smile = smile + self._compound_object = compound_object + self._molecule = molecule + self._original_smile = original_smile + self._enumeration_id = enumeration_id + self._conformers = [] + + def empty(self) -> bool: + if len(self.get_conformers()) == 0: + return True + return False + + def get_compound_name(self) -> str: + if self.get_compound_object() is not None: + return self.get_compound_object().get_name() + + def _get_next_conformer_id(self) -> int: + ids = [conf.get_conformer_id() for conf in self.get_conformers()] + if len(ids) == 0: + return 0 + else: + return max(ids) + 1 + + def sort_conformers( + self, by_tag: Union[str, List[str]], reverse: bool = True, aggregation="sum" + ): + conformers = self.get_conformers() + if isinstance(by_tag, str): + conformers = sorted( + conformers, + key=lambda x: float(x.get_molecule().GetProp(by_tag)), + reverse=reverse, + ) + self._conformers = conformers + self.reset_conformer_ids() + elif isinstance(by_tag, list): + # need to normalise the values, calculate max and min of each tag in the series + def normalise_tag(value, tag): + all_tag_values = [ + float(conf.get_molecule().GetProp(tag)) for conf in conformers + ] + max_tag = np.max(all_tag_values) + min_tag = np.min(all_tag_values) + return (float(value) - min_tag) / (max_tag - min_tag) + + # if we specify multiple tags, aggregate according the the provided aggregation function + if aggregation == "sum": + conformers = sorted( + conformers, + key=lambda x: np.sum( + [ + float(normalise_tag(x.get_molecule().GetProp(i), i)) + for i in by_tag + ] + ), + reverse=reverse, + ) + self._conformers = conformers + elif aggregation == "product": + conformers = sorted( + conformers, + key=lambda x: np.product( + [ + float(normalise_tag(x.get_molecule().GetProp(i), i)) + for i in by_tag + ] + ), + reverse=reverse, + ) + self._conformers = conformers + else: + raise AttributeError( + "Only sum or product aggregation modes are currently supported - ABORT" + ) + # for ligand in self.ligands: + + # ligand.set_conformers(sorted(ligand.get_conformers(), + # key=lambda x: float(x.GetProp(_ROE.GLIDE_DOCKING_SCORE)), reverse=False)) + # ligand.add_tags_to_conformers() + + def find_conformer(self, conformer_id: int) -> Conformer: + conf = [ + conf + for conf in self.get_conformers() + if conf.get_conformer_id() == conformer_id + ] + if len(conf) == 0: + raise IndexError(f"Could not find conformer with id {conformer_id}.") + elif len(conf) > 1: + raise ContainerCorrupted( + f"More than one conformer with id {conformer_id} found in the same Enumeration instance (compound_number: {self.get_enumeration_id()})." + ) + return conf[0] + + def get_conformer_ids(self) -> List[int]: + ids = [conf.get_conformer_id() for conf in self.get_conformers()] + return ids + + def reset_conformer_ids(self): + for new_id, conf in enumerate(self.get_conformers()): + conf.set_conformer_id(conformer_id=new_id) + + def add_conformer(self, conformer: Conformer, auto_update: bool = True): + """Add a new conformer. If "auto_update" is True, the Enumeration class will be set to "self" and + the conformer_id will be set to the next free index.""" + conformer = deepcopy(conformer) + if auto_update: + conformer.set_enumeration_object(self) + conformer.set_conformer_id(self._get_next_conformer_id()) + self._conformers.append(conformer) + + def add_conformers(self, conformers: List[Conformer], auto_update: bool = True): + """Add new conformers. If "auto_update" is True, the Enumeration class will be set to "self" and + the conformer_id will be set to the next free index.""" + for conformer in conformers: + self.add_conformer(conformer=conformer, auto_update=auto_update) + + def get_index_string(self) -> str: + comp_obj = self.get_compound_object() + comp_str = "" + if comp_obj is not None: + comp_str = comp_obj.get_index_string() + enum_str = "" + if self.get_enumeration_id() is not None: + enum_str = str(self.get_enumeration_id()) + return ":".join([comp_str, enum_str]) + + def clean_failed_conformers(self): + # all conformers, where the molecule has been set to None by a function can be considered to have failed + for idx in list(reversed(range(len(self._conformers)))): + if self._conformers[idx].get_molecule() is None: + del self._conformers[idx] + self.reset_conformer_ids() + + def clear_molecule(self): + self._molecule = None + + def clear_conformers(self): + self._conformers = [] + + def get_conformers(self) -> List[Conformer]: + return self._conformers + + def clone_conformers(self) -> List[Conformer]: + return [deepcopy(conf) for conf in self._conformers] + + def set_compound_object(self, compound_object): + self._compound_object = compound_object + + def get_compound_object(self): + return self._compound_object + + def set_enumeration_id(self, enumeration_id: int): + self._enumeration_id = enumeration_id + + def get_enumeration_id(self) -> int: + return self._enumeration_id + + def set_smile(self, smile: str): + self._smile = smile + + def get_smile(self) -> str: + return self._smile + + def set_molecule(self, molecule: Chem.Mol): + self._molecule = molecule + + def get_molecule(self) -> Chem.Mol: + return self._molecule + + def set_original_smile(self, original_smile: str): + self._original_smile = original_smile + + def get_original_smile(self) -> str: + return self._original_smile + + def _clone(self): + clone = Enumeration( + compound_object=self.get_compound_object(), + smile=self.get_smile(), + molecule=deepcopy(self.get_molecule()), + original_smile=self.get_original_smile(), + enumeration_id=self.get_enumeration_id(), + ) + for conf in self.get_conformers(): + conf = deepcopy(conf) + conf.set_enumeration_object(enumeration_object=clone) + clone.add_conformer(conf, auto_update=False) + return clone + + def __copy__(self): + return self._clone() + + def __deepcopy__(self, memo): + return self._clone() + + def __repr__(self): + parent_compound_id = ( + None + if self.get_compound_object() is None + else self.get_compound_object().get_compound_number() + ) + return "" % ( + self.get_enumeration_id(), + self.get_smile(), + parent_compound_id, + len(self._conformers), + ) + + def __str__(self): + return self.__repr__() + + def __iter__(self): + return iter(self._conformers) + + def __getitem__(self, key: int) -> Conformer: + return self._conformers[key] + + def __len__(self) -> int: + return len(self.get_conformers()) + + +class Compound: + """This class bundles all information on a molecule and serves mainly to group enumerations.""" + + def __init__(self, name: str = "", compound_number: int = None): + self._CC = CompoundContainerEnum() + self._EC = EnumerationContainerEnum() + self._name = name + self._compound_number = compound_number + self._enumerations = [] + + def __repr__(self): + return "" % ( + self.get_name(), + self.get_compound_number(), + len(self.get_enumerations()), + ) + + def __str__(self): + return self.__repr__() + + def get_index_string(self) -> str: + if self.get_compound_number() is not None: + return str(self.get_compound_number()) + else: + return "" + + def set_name(self, name: str): + self._name = name + + def get_name(self) -> str: + return self._name + + def set_compound_number(self, compound_number: int): + self._compound_number = compound_number + + def get_compound_number(self) -> int: + return self._compound_number + + def add_enumeration(self, enumeration: Enumeration, auto_update: bool = True): + """Add a new enumeration. If "auto_update" is True, the Compound class will be set to "self" and + the enumeration_id will be set to the next free index.""" + enumeration = deepcopy(enumeration) + if auto_update: + enumeration.set_compound_object(self) + enumeration.set_enumeration_id(self._get_next_enumeration_id()) + self._enumerations.append(enumeration) + + def add_enumerations( + self, enumerations: List[Enumeration], auto_update: bool = True + ): + """Add new enumerations. If "auto_update" is True, the Compound class will be set to "self" and + the enumeration_id will be set to the next free index.""" + for enumeration in enumerations: + self.add_enumeration(enumeration=enumeration, auto_update=auto_update) + + def clear_enumerations(self): + self._enumerations = [] + + def find_enumeration(self, idx: int): + for enum in self.get_enumerations(): + if enum.get_enumeration_id() == idx: + return enum + + def get_enumerations(self) -> List[Enumeration]: + return self._enumerations + + def _clone(self): + clone = Compound( + name=self.get_name(), compound_number=self.get_compound_number() + ) + for enum in self.get_enumerations(): + enum = deepcopy(enum) + enum.set_compound_object(compound_object=clone) + clone.add_enumeration(enum, auto_update=False) + return clone + + def __iter__(self): + return iter(self._enumerations) + + def __copy__(self): + return self._clone() + + def __deepcopy__(self, memo): + return self._clone() + + def __getitem__(self, key: int) -> Enumeration: + return self._enumerations[key] + + def __len__(self) -> int: + return len(self.get_enumerations()) + + def _get_next_enumeration_id(self): + ids = [enum.get_enumeration_id() for enum in self.get_enumerations()] + if len(ids) == 0: + return 0 + else: + return max(ids) + 1 + + def find_enumeration(self, enumeration_id: int) -> Enumeration: + enum = [ + enum + for enum in self.get_enumerations() + if enum.get_enumeration_id() == enumeration_id + ] + if len(enum) == 0: + raise IndexError(f"Could not find enumeration with id {enumeration_id}.") + elif len(enum) > 1: + raise ContainerCorrupted( + f"More than one enumeration with id {enumeration_id} found in the same Compound instance (compound_number: {self.get_compound_number()})." + ) + return enum[0] + + def get_enumeration_ids(self) -> List[int]: + ids = [enum.get_enumeration_id() for enum in self.get_enumerations()] + return ids + + def reset_enumeration_ids(self): + for new_id, enum in enumerate(self.get_enumerations()): + enum.set_enumeration_id(enumeration_id=new_id) + + def reset_all_ids(self): + self.reset_enumeration_ids() + for enum in self.get_enumerations(): + enum.reset_conformer_ids() + + def update_all_relations(self): + for enum in self.get_enumerations(): + enum.set_compound_object(self) + for conf in enum.get_conformers(): + conf.set_enumeration_object(enum) + + def empty(self) -> bool: + if len(self.get_enumerations()) == 0: + return True + return False + + def unroll_conformers(self) -> List[Conformer]: + conformers = [] + for enum in self.get_enumerations(): + # guard against empty enumerations that might be used when constructing more complex data flows + if enum.empty(): + continue + for conf in enum.get_conformers(): + conformers.append(conf) + return conformers + + +# TODO: Replacing these three functions by a wrapper object +def get_compound_by_id(compounds: List[Compound], id: int) -> Compound: + for compound in compounds: + if compound.get_compound_number() == id: + return compound + raise ValueError( + f"Could not find compound with id {id} in list of length {len(compounds)}." + ) + + +def get_compound_by_name(compounds: List[Compound], name: str) -> Compound: + for compound in compounds: + if compound.get_name() == name: + return compound + raise ValueError( + f"Could not find compound with name {name} in list of length {len(compounds)}." + ) + + +def unroll_conformers(compounds: List[Compound]) -> List[Conformer]: + all_conformers = [] + for comp in compounds: + all_conformers = all_conformers + comp.unroll_conformers() + return all_conformers + + +def unroll_enumerations(compounds: List[Compound]) -> List[Enumeration]: + all_enumerations = [] + for comp in compounds: + all_enumerations = all_enumerations + comp.get_enumerations() + return all_enumerations diff --git a/icolos/core/containers/generic.py b/icolos/core/containers/generic.py new file mode 100644 index 0000000..51bee70 --- /dev/null +++ b/icolos/core/containers/generic.py @@ -0,0 +1,210 @@ +from shutil import copyfile +from distutils.dir_util import copy_tree +import json +import os +import sys +from typing import Any, List, Dict, Union +from copy import Error + + +class GenericData: + """Container class to hold generic data of any file type""" + + def __init__( + self, + file_name: str, + file_data=None, + argument=True, + file_id: int = None, + extension: str = None, + ): + self._extension = ( + extension if extension is not None else file_name.split(".")[-1] + ) + self._file_name = file_name + self._file_data = file_data + self._file_id = file_id + # self._argument: bool = argument + self._file_size = self.calculate_file_size() + + def get_file_name(self) -> str: + return self._file_name + + def get_data(self) -> Any: + return self._file_data + + def calculate_file_size(self): + return sys.getsizeof(self._file_data) + + def get_extension(self): + return self._extension + + def set_data(self, data): + self._file_data = data + + def set_file_name(self, file_name): + self._file_name = file_name + + def set_id(self, file_id): + self._file_id = file_id + + def get_id(self): + return self._file_id + + def set_extension(self, extension): + self._extension = extension + + def write(self, path: str, join: bool = True, final_writeout: bool = False): + """ + Handles all I/O operations for generic data. Support for handling directories and symlinks + """ + orig_path = path + if join: + path = os.path.join(path, self.get_file_name()) + + if str(self._file_data).startswith("/"): + # file data is a path, copy the file to the destination + # if it's a file, its stored like this because it's large (> 2GB) + if os.path.isfile(self._file_data): + if not final_writeout: + # if this is a writeout to a step, we can simply create a simlink + os.symlink(self._file_data, path, target_is_directory=False) + else: + # we cannot do this for the final writeout since /scratch or /tmp will eventually get cleaned + copyfile(self._file_data, path) + + elif os.path.isdir(self._file_data): + # copy the entire directory to the parent dir + copy_tree(self._file_data, orig_path) + elif isinstance(self._file_data, list): + with open(path, "w") as f: + f.writelines(self._file_data) + + elif isinstance(self._file_data, str): + with open(path, "w") as f: + f.write(self._file_data) + elif isinstance(self._file_data, dict): + with open(path, "w") as f: + f.write(json.dumps(self._file_data)) + else: + with open(path, "wb") as f: + f.write(self._file_data) + + def update_data(self, data): + if sys.getsizeof(data) != self._file_size: + self.set_data(data) + + def __repr__(self): + return f"GenericData object - name: {self._file_name}, extension: {self._extension}." + + def __str__(self): + return self.__repr__() + + +class GenericContainer: + """Container class to hold the instances of the Generic class, separated by extension""" + + def __init__(self): + self._file_dict: Dict[str, List] = {} + + # self._paths = [] + # self._strings = [] + + def add_file(self, file: GenericData): + ext = file.get_extension() + file.set_id(self.get_next_file_id(ext)) + try: + self._file_dict[ext].append(file) + except NameError: + self._initialise_list(ext) + self._file_dict[ext].append(file) + + def _initialise_list(self, ext): + self._file_dict[ext] = [] + + def get_next_file_id(self, ext): + ids = [file.get_id() for file in self.get_files_by_extension(ext)] + if len(ids) == 0: + return 0 + else: + return max(ids) + 1 + + def get_file_by_index(self, index): + for file in self.get_flattened_files(): + if file.get_id() == index: + return file + + def add_files(self, files: List[GenericData]): + extensions = list(set([f.get_extension() for f in files])) + if len(extensions) > 1: + raise Error("Cannot have more than one type of file") + else: + if extensions[0] in self._file_dict.keys(): + self._file_dict[extensions[0]].extend(files) + else: + self._file_dict[extensions[0]] = files + + def get_all_files(self) -> Dict[str, List]: + return self._file_dict + + def get_files_by_extension(self, ext: str) -> List[GenericData]: + if ext in self._file_dict.keys(): + return self._file_dict[ext] + else: + self._initialise_list(ext) + return self._file_dict[ext] + + def get_file_names_by_extension(self, ext: str): + try: + return [f.get_file_name() for f in self._file_dict[ext]] + except KeyError: + self._initialise_list(ext) + return [f.get_file_name() for f in self._file_dict[ext]] + + def get_file_types(self): + return self._file_dict.keys() + + def get_flattened_files(self) -> List[GenericData]: + rtn_files = [] + for key in self._file_dict.keys(): + for file in self._file_dict[key]: + rtn_files.append(file) + return rtn_files + + def get_file_by_name(self, name): + for file in self.get_flattened_files(): + if file.get_file_name() == name: + return file + + def clear_file_dict(self): + self._file_dict = {} + + def get_argument_by_extension( + self, ext, rtn_file_object=False + ) -> Union[GenericData, str]: + files = [] + for file in self.get_flattened_files(): + if file.get_extension() == ext: + files.append(file) + try: + assert len(files) == 1 + except AssertionError: + print( + f"Found multiple files with extension {ext}, select the index of the file to be passed as an argument\n" + ) + print("######################") + for idx, file in enumerate(files): + print(f"{idx}: {file.get_file_name()}") + print("######################") + index = input(">>> ") + files = [files[int(index)]] + + if not rtn_file_object: + return files[0].get_file_name() + else: + return files[0] + + def write_out_all_files(self, folder): + """flattens all files in the container and writes to the specified directory""" + for file in self.get_flattened_files(): + file.write(folder) diff --git a/icolos/core/containers/perturbation_map.py b/icolos/core/containers/perturbation_map.py new file mode 100644 index 0000000..da62103 --- /dev/null +++ b/icolos/core/containers/perturbation_map.py @@ -0,0 +1,294 @@ +from typing import Dict, List, Optional +from IPython.lib.display import IFrame +import pandas as pd +from icolos.core.containers.compound import Compound, Conformer, Enumeration +from pyvis.network import Network +from icolos.core.containers.generic import GenericData + +from icolos.utils.enums.step_enums import StepFepPlusEnum +import os +from pydantic import BaseModel + + +_SFE = StepFepPlusEnum() + + +class Node(BaseModel): + """ + Container class for the nodes, wrapper class around a compound object + """ + + class Config: + arbitrary_types_allowed = True + + node_id: str = None + node_hash: str = None + conformer: Conformer = Conformer() + node_connectivity: List = [] + + def __init__(self, **data) -> None: + super().__init__(**data) + + def get_node_id(self) -> str: + return self.node_id + + def get_node_color(self): + # TODO: Expand this so we have different colours for each connectivity number [1,10] + # this is just a placeholder for now + thresholds = {i: "c0affe" for i in range(10)} + + num_connections = len(self.node_connectivity) + return thresholds[num_connections] + + def set_node_id(self, node_id: str): + self.node_id = node_id + + def get_conformer(self) -> Conformer: + return self.conformer + + def set_conformer(self, conformer: Conformer) -> None: + self.conformer = conformer + + def get_node_hash(self) -> str: + return self.node_hash + + # TODO: add methods here to access connectivity and color attributes + + +class Edge(BaseModel): + """ + Simple container class the the edges in the perturbation map, specified entirely by the connected nodes + """ + + class Config: + arbitrary_types_allowed = True + + node_from: Node = Node() + node_to: Node = Node() + total: str = None + mcs: str = None + chg: str = None + softbond: str = None + min_no_atoms: str = None + snapCoreRmsd: str = None + bidirSnapCoreRmsd: str = None + + def __init__(self, **data): + super().__init__(**data) + + def get_source_node_name(self): + return self.node_from.get_node_hash() + + def get_destination_node_name(self): + return self.node_to.get_node_hash() + + def get_edge_id(self) -> str: + # construct the edge ID from the node hashes, separated by '_' + return f"{self.node_from.get_node_hash()}_{self.node_to.get_node_hash()}" + + +class PerturbationMap(BaseModel): + """Hold a map construction parsed from a csv (probabably from a parsed schrodinger log + file or something) and provide some utility methods for doing pmx calculations on the edges""" + + class Config: + arbitrary_types_allowed = True + + nodes: List[Node] = [] + edges: List[Edge] = [] + hash_map: Dict = {} + compounds: List[Compound] = [] + protein: GenericData = None + vmap_output: IFrame = None + replicas: int = 3 + + def __init__(self, **data) -> None: + super().__init__(**data) + + def _get_line_idx(self, data, id_str) -> int: + line = [e for e in data if id_str in e] + assert len(line) == 1 + line = line[0] + return data.index(line) + + def _get_conformer_by_id(self, comp_id: str) -> Optional[Conformer]: + # get the compund object based on the ID in the ligand table (compound names). At this stage in the workflow we have only one conformer per enumeration + try: + # standard icolos naming conventino + parts = comp_id.split(":") + compound_id = parts[0] + enumeration_id = parts[1] + except: + # a non-standard compound name has been used + compound_id = comp_id + for compound in self.compounds: + if compound.get_name().split(":")[0] == compound_id: + rtn_compound = compound + enums = rtn_compound.get_enumerations() + + if len(enums) == 1: + # easy case, there is only one enumeration, return it's single conformer + + # at this stage, the docking poses must have been filtered to a single entry + # per enumeration (an enumerations should have been filtered on charge state etc.) + return enums[0].get_conformers()[0] + else: + # multiple enumerations, must be using Icolos naming or we cannot infer which + # enumeration should be used + enum = rtn_compound.find_enumeration( + enumeration_id=int(enumeration_id) + ) + return enum.get_conformers()[0] + + # TODO: Remove this before integration + print( + f"Compound with id {compound_id} was not found in the map - it may have been lost during map construction" + ) + + def parse_map_file(self, file_path) -> None: + # we need to do some format enforcement here (schrodinger or otherwise) + + with open(file_path, "r") as f: + data = f.readlines() + + start_edge = self._get_line_idx(data, _SFE.EDGE_HEADER_LINE) + start_node = self._get_line_idx(data, _SFE.NODE_HEADER_LINE) + stop_node = self._get_line_idx(data, _SFE.SIMULATION_PROTOCOL) + edge_info_start = self._get_line_idx(data, _SFE.SIMILARITY) + + # TODO: refactor that + # clean up the data from schrodinger + split_data = [] + for line in data: + split_line = line.split(" ") + stripped_line = [] + for element in split_line: + if not element.isspace() and element: + stripped_line.append(element.strip()) + split_data.append(stripped_line) + + data = split_data + + map_info = pd.DataFrame( + data[start_edge + 3 : start_node - 1], + index=None, + columns=[ + "Short ID", + "ligand1 -> ligand2", + "Bennett ddG", + "Cycle Closure ddG", + "Complex dG", + "Solvent dG", + ], + ) + node_info = pd.DataFrame( + data[start_node + 3 : stop_node - 1], + index=None, + columns=[ + "hash_id", + "node_id", + "Predicted dG", + "Experimental dG", + "Predicted Solvation dG", + "Experimental Solvation dG", + ], + ) + edge_info = pd.DataFrame( + data[edge_info_start + 3 : -1], + columns=[ + "Short ID", + "Total", + "Mcs", + "Charge", + "SoftBond", + "MinimumNumberOfAtom", + "SnapCoreRmsd", + "BidirectionSnapCore", + ], + ).dropna() + for hash_id, node_id in zip(node_info["hash_id"], node_info["node_id"]): + # map the hashes to the compound IDs + self.hash_map[hash_id] = node_id + node = Node( + node_id=node_id, + node_hash=hash_id, + conformer=self._get_conformer_by_id(node_id), + ) + # generate the Node object to wrap the compound + self.nodes.append(node) + + for _, edge in edge_info.iterrows(): + edge = Edge( + node_from=self._get_node_by_hash_id(edge[0].split("_")[0]), + node_to=self._get_node_by_hash_id(edge[0].split("_")[1]), + total=edge[1], + mcs=edge[2], + chg=edge[3], + softbond=edge[4], + min_no_atoms=edge[5], + snapCoreRmsd=edge[6], + bidirSnapCoreRmsd=edge[7], + ) + self.edges.append(edge) + # process the node info, generate the hash map + for node in self.nodes: + self._attach_node_connectivity(node) + + def _attach_node_connectivity(self, node: Node): + # looks through the constructed edges, returns ids of any edges that have the specified node as one component + connected_edges = [] + for edge in self.edges: + if ( + edge.node_from.get_node_hash() == node.node_hash + or edge.node_to.get_node_hash() == node.node_hash + ): + connected_edges.append(edge.get_edge_id()) + node.node_connectivity = connected_edges + + def _get_node_by_node_id(self, node_id: str) -> Node: + for node in self.nodes: + if node.node_id == node_id: + return node + + def _get_node_by_hash_id(self, hash_id: str) -> Node: + for node in self.nodes: + if node.node_hash == hash_id: + return node + + def get_edges(self) -> List[Edge]: + return self.edges + + def get_nodes(self) -> List[Node]: + return self.nodes + + def visualise_perturbation_map(self, write_out_path: str) -> None: + """method for visualising the data as map with pyvis - Network""" + vmap = Network(directed=True) + vmap.barnes_hut() + + # this is not an iterable + for edge in self.edges: + vmap.add_node( + edge.get_source_node_name(), color=edge.node_from.get_node_color() + ) + vmap.add_node( + edge.get_destination_node_name(), color=edge.node_to.get_node_color() + ) + vmap.add_edge( + source=edge.get_source_node_name(), + to=edge.get_destination_node_name(), + length=edge.total, + label="total: " + str(edge.total), + title="Mcs: " + str(edge.mcs) + ", SnapCoreRMSD: ", + ) + self.vmap_output = vmap.show(os.path.join(write_out_path, "vmap.html")) + # return self.vmap_output + + def get_protein(self) -> GenericData: + return self.protein + + def __repr__(self) -> str: + return f"Icolos Perturbation Map object containing {len(self.edges)} edges and {len(self.nodes)} nodes" + + def __str__(self) -> str: + return self.__repr__() diff --git a/icolos/core/flow_control/__init__.py b/icolos/core/flow_control/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/core/flow_control/flow_control.py b/icolos/core/flow_control/flow_control.py new file mode 100644 index 0000000..32377b9 --- /dev/null +++ b/icolos/core/flow_control/flow_control.py @@ -0,0 +1,68 @@ +from typing import List +from pydantic import BaseModel, PrivateAttr +from icolos.core.workflow_steps.step import StepSettingsParameters +from icolos.core.workflow_steps.step import StepBase +from icolos.loggers.steplogger import StepLogger +from icolos.core.workflow_steps.step import ( + StepData, + StepInputParameters, + StepWriteoutParameters, + StepExecutionParameters, +) +from icolos.utils.enums.step_enums import StepBaseEnum +from icolos.utils.enums.step_initialization_enum import StepInitializationEnum +from icolos.utils.general.convenience_functions import nested_get + +_SIE = StepInitializationEnum() + + +class BaseStepConfig(BaseModel): + """ + Minimal template class for the base config, without the unnecessary stuff that StepBase requires + """ + + step_id: str = None + work_dir: str = None + type: str = None + data: StepData = StepData() + input: StepInputParameters = StepInputParameters() + writeout: List[StepWriteoutParameters] = [] + execution: StepExecutionParameters = StepExecutionParameters() + settings: StepSettingsParameters = StepSettingsParameters() + + def _as_dict(self): + return { + "step_id": self.step_id, + "type": self.type, + "execution": self.execution, + "settings": self.settings, + "work_dir": self.work_dir, + "data": self.data, + "input": self.input, + "writeout": self.writeout, + } + + +class FlowControlBase(BaseModel): + # List of steps to be iterated over, each set needs their inputs chained together + base_config: List[BaseStepConfig] = None + initialized_steps: List[StepBase] = None + _logger = PrivateAttr() + + def __init__(self, **data) -> None: + super().__init__(**data) + self._logger = StepLogger() + + def _initialize_step_from_dict(self, step_conf: dict): + # TODO: check if overlaps with the other "initialize_step_from_dict" method + # Require a separate initialisation method to avoid circular import + _STE = StepBaseEnum + + step_type = nested_get(step_conf, _STE.STEP_TYPE, default=None) + step_type = None if step_type is None else step_type.upper() + if step_type in _SIE.STEP_INIT_DICT.keys(): + return _SIE.STEP_INIT_DICT[step_type](**step_conf) + else: + raise ValueError( + f"Backend for step {nested_get(step_conf, _STE.STEPID, '')} unknown." + ) diff --git a/icolos/core/flow_control/iterator.py b/icolos/core/flow_control/iterator.py new file mode 100644 index 0000000..63781fa --- /dev/null +++ b/icolos/core/flow_control/iterator.py @@ -0,0 +1,223 @@ +from typing import Dict, List, Union +from pydantic import BaseModel + +from icolos.core.flow_control.flow_control import BaseStepConfig, FlowControlBase +from copy import deepcopy +from icolos.core.job_control.job_control import StepJobControl +from icolos.core.workflow_steps.step import _LE +from icolos.utils.enums.step_enums import StepBaseEnum +from icolos.core.workflow_steps.step import StepBase +from icolos.utils.enums.step_enums import IteratorEnum +import os + +_IE = IteratorEnum +_SBE = StepBaseEnum + + +class IterSettingsParameters(BaseModel): + # unpacked version of StepSettingsParameters + flags: List = [] + parameters: Dict = {} + additional: Dict = {} + + +class IterParallelizer(BaseModel): + + # if true, steps must be totally independent, the iterator step + parallelize: bool = False + cores: int = 1 + dependent_steps: int = None + + +class IterSettings(BaseModel): + # dictionary of settings to change + + # settings: {step_id: {IterSettingsParameters}} + settings: Dict[str, IterSettingsParameters] = {} + iter_mode: _IE = _IE.N_ITERS + n_iters: int = None + parallelizer_settings: IterParallelizer = IterParallelizer() + + +class StepIterator(FlowControlBase, BaseModel): + """ + Implements iterator mechanism: + wraps one or multiple steps and generates n copies of that set of steps according to iter_config + Becomes master job when parallize=True, using Icolos JobControl to interface with external resources + """ + + # holds the dict of iterables for the bits to chang + iter_settings: IterSettings = IterSettings() + + def __init__(self, **data): + super().__init__(**data) + # when init_step_from_dict calls this method, we need to initialise a list of steps, + # controlled by iter_settings.iter_mode + self.initialized_steps = self._initialize_steps() + # either generate a list, if serial execution, or initialize a single JobControl + # step with each config as an initialized step + + def _initialize_n_iters(self) -> List: + """ + Initialise n identical copies of the same step config + """ + init_steps = [] + for i in range(self.iter_settings.n_iters): + + list_step_conf = deepcopy(self.base_config) + + # hand all steps over to the config updater + formatted_confs = self._update_config(list_step_conf, f"run_{i}") + for step_conf in formatted_confs: + initialized_step = self._initialize_step_from_dict(step_conf._as_dict()) + init_steps.append(initialized_step) + return init_steps + + def _modify_settings(self, settings, step_config, i: int): + base_conf = deepcopy(step_config) + iter_settings = deepcopy(settings) + if settings.flags: + settings.flags = {"flags": settings.flags} + + # iterate over the flags + # if it's been converted, hence there are flags to be applied + base_conf.settings.arguments.flags.append(iter_settings.flags.values()[i]) + for ( + key, + val, + ) in iter_settings.parameters.items(): + base_conf.settings.arguments.parameters[key] = val[i] + for ( + key, + val, + ) in iter_settings.additional.items(): + # however many lists of n items + base_conf.settings.additional[key] = val[i] + + return base_conf + + def _initialize_single(self) -> List: + """ + Iterate through all settings step-wise, changing all setting blocks simultaneously, returning n initialised steps for n + """ + init_steps = [] + for i in range(self.iter_settings.n_iters): + + # iterate over the steps in the base config, and the corresponding settings, if these are to be modified + step_sublist = [] + for step_config in self.base_config: + + # check if we need to iterate through settings in this step, else just use the base config + if step_config.step_id in self.iter_settings.settings.keys(): + settings = self.iter_settings.settings[step_config.step_id] + step_sublist.append(self._modify_settings(settings, step_config, i)) + else: + step_sublist.append(step_config) + + # update all configs with references to updated step_ids etc + formatted_configs = self._update_config(step_sublist, f"run_{i}") + for step_conf in formatted_configs: + initialized_step = self._initialize_step_from_dict(step_conf._as_dict()) + init_steps.append(initialized_step) + return init_steps + + # def _initialize_compounds(self): + # """ + # Generates n copies of a step, each with a single compound loaded from the source step + # * Only the first step in base_config needs updating, downstream data handover from this step is handed properly anyway + # """ + # init_steps = [] + # # TODO: get the number of compounds automatically? + # for i in range(self.iter_settings.n_iters): + # list_step_conf = deepcopy(self.base_config) + # first_step_config = list_step_conf[0] + # # probably only expecting one set of input compounds but this will select the ith for all inputs + # for inp_block in first_step_config.input.compounds: + # inp_block.selected_compound_id = i + # formatted_confs = self._update_config(list_step_conf, f"run_{i}") + # for step_conf in formatted_confs: + # initialized_step = self._initialize_step_from_dict(step_conf.as_dict()) + # init_steps.append(initialized_step) + # return init_steps + + def _initialize_steps(self) -> Union[List, StepBase]: + """ + Handle step init according to config + Returns a list of steps if serial execution (default) + Returns a Step-like JobControl object if parallelisation is specified. + """ + steps = [] + if self.iter_settings.iter_mode == _IE.N_ITERS: + # simplest mode, just n repeats of the same step + steps += self._initialize_n_iters() + + elif self.iter_settings.iter_mode == _IE.SINGLE: + # for n different settings, iterate through each, returning n steps + steps += self._initialize_single() + elif self.iter_settings.iter_mode == _IE.ALL: + raise NotImplementedError + # initialise all combinations of steps by combining settings + # steps.append(self._initialize_combined()) + + self._logger.log( + f"Iterator has initialized {len(steps)} steps for step {self.base_config[0].step_id}", + _LE.DEBUG, + ) + if not self.iter_settings.parallelizer_settings.parallelize: + return steps + else: + + wrapper = StepJobControl( + step_id="JobControl", + type=_SBE.STEP_JOB_CONTROL, + initialized_steps=steps, + parallel_execution=self.iter_settings.parallelizer_settings, + ) + return wrapper + + def _update_config( + self, step_conf: List[BaseStepConfig], run_id: str + ) -> List[BaseStepConfig]: + """ + Manages modifications to each step in the config: + * step_id is updated with the run_id + * any references to other step_ids (e.g. in input) contained in the base config are updated to reflect the change + * writeout paths are updated to separate output from each of the runs + """ + original_step_ids = [conf.step_id for conf in step_conf] + formatted_confs = [] + for conf in step_conf: + # modify the step_id + st_id = conf.step_id + conf.step_id = st_id + "_" + run_id + # modify the writeout paths: add a key_value dir the writeout path + for idx, block in enumerate(conf.writeout): + if block.destination.resource is not None: + resource = block.destination.resource + parts = resource.split("/") + new_resource = os.path.join("/".join(parts[:-1]), run_id, parts[-1]) + block.destination.resource = new_resource + + # modify the step_input blocks if they reference a step_id contained in step_conf + # treat compounds + for comp in conf.input.compounds: + if comp.source in original_step_ids: + comp.source += f"_{run_id}" + + for gen in conf.input.generic: + if gen.source in original_step_ids: + gen.source += f"_{run_id}" + + # TODO: this is a bodge for now + # we have an edge case in data manipularion that needs to match compounds those from another step, the source name needs the same treatment + if conf.type.upper() == _SBE.STEP_DATA_MANIPULATION: + if ( + _SBE.INPUT_SOURCE in conf.settings.additional.keys() + and conf.settings.additional[_SBE.INPUT_SOURCE] in original_step_ids + ): + + conf.settings.additional[_SBE.INPUT_SOURCE] += f"_{run_id}" + + formatted_confs.append(conf) + + return formatted_confs diff --git a/icolos/core/job_control/__init__.py b/icolos/core/job_control/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/core/job_control/job_control.py b/icolos/core/job_control/job_control.py new file mode 100644 index 0000000..5d4cf68 --- /dev/null +++ b/icolos/core/job_control/job_control.py @@ -0,0 +1,89 @@ +from typing import List +from pydantic.main import BaseModel + +from icolos.core.workflow_steps.step import StepBase +from icolos.utils.general.parallelization import Parallelizer, SubtaskContainer +from icolos.core.workflow_steps.step import _LE + + +class IterParallelizer(BaseModel): + # config block controlling how the steps are parallelized + # if you are executing a 5 step workflow with 10 repeats, dependent_steps = 5, cores = 10 + # this will allow each independent replica to be allocated to a single job queue, retaining step order + parallelize: bool = False + cores: int = 1 + dependent_steps: int = None + + +class StepJobControl(StepBase, BaseModel): + """ + Step class containing job control functionality required for StepIterator, supports Slurm for job scheduling + Supports running Icolos process as master job for parallel step execution on cluster. Generates a pool of initialized steps to be executed, based on the + """ + + initialized_steps: List = [] + # expect the parallel execution block to be handed over from flow control + parallel_execution: IterParallelizer = IterParallelizer() + + def __init__(self, **data): + super().__init__(**data) + + def _prepare_batch(self, batch) -> List[List[StepBase]]: + + batch_steps = [] + for sublist in batch: + sublist_steps = [] + for task in sublist: + sublist_steps.append(task.data) + batch_steps.append(sublist_steps) + return batch_steps + + def execute(self): + """ + Execute multiple steps in parallel + """ + # Spin up multiple processes + self.execution.parallelization.cores = self.parallel_execution.cores + # each subtask needs to contain an entire mini workflow to be executed sequentially, + self.execution.parallelization.max_length_sublists = ( + self.parallel_execution.dependent_steps + ) + + # if we try steps multiple times, we have steps fail depending on its dependency on a + # previous step - too complicated + self._subtask_container = SubtaskContainer(max_tries=1) + self._subtask_container.load_data(self.initialized_steps) + + parallelizer = Parallelizer(func=self._run_step) + n = 1 + + while self._subtask_container.done() is False: + + next_batch = self._get_sublists( + get_first_n_lists=self.parallel_execution.cores + ) # return n lists of length max_sublist_length + _ = [sub.increment_tries() for element in next_batch for sub in element] + _ = [sub.set_status_failed() for element in next_batch for sub in element] + + self._logger.log( + f"Starting {len(next_batch)} parallel jobs under Icolos JobControl, execution batch {n}", + _LE.INFO, + ) + + steps = self._prepare_batch(next_batch) + + result = parallelizer.execute_parallel(steps=steps) + + # sucessful execution of each step is not explicitly checked, + # the step is responsible for throwing errors if something has gone wrong + for task in next_batch: + for subtask in task: + subtask.set_status_success() + + def _run_step(self, steps: List[StepBase]): + # submits then monitors the step + for step in steps: # length max_len_sublist + # at this point the internal steps don't have their data initialised + step.generate_input() + step.execute() + step.process_write_out() diff --git a/icolos/core/step_utils/__init__.py b/icolos/core/step_utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/core/step_utils/input_merger.py b/icolos/core/step_utils/input_merger.py new file mode 100644 index 0000000..ecc3566 --- /dev/null +++ b/icolos/core/step_utils/input_merger.py @@ -0,0 +1,114 @@ +from copy import deepcopy +from typing import List, Dict +from pydantic import BaseModel + +from icolos.core.containers.compound import Enumeration, Compound +from icolos.utils.enums.step_enums import StepBaseEnum + +_SBE = StepBaseEnum + + +class StepMerge(BaseModel): + compounds: bool = True + enumerations: bool = False + merge_compounds_by: str = _SBE.INPUT_MERGE_BY_NAME + merge_enumerations_by: str = _SBE.INPUT_MERGE_BY_ID + + +class InputMerger: + def __init__(self, config: StepMerge): + self.config = config + + def _group_enumerations( + self, enumerations: List[Enumeration], by + ) -> Dict[str, List[Enumeration]]: + if by == _SBE.INPUT_MERGE_BY_SMILE: + grouped = {enumeration.get_smile(): [] for enumeration in enumerations} + for enum in enumerations: + grouped[enum.get_smile()].append(enum) + elif by == _SBE.INPUT_MERGE_BY_ID: + grouped = { + str(enumeration.get_enumeration_id()): [] + for enumeration in enumerations + } + for enum in enumerations: + grouped[str(enum.get_enumeration_id())].append(enum) + else: + raise NotImplementedError + return grouped + + def _group_compounds( + self, compounds: List[Compound], by + ) -> Dict[str, List[Compound]]: + if by == _SBE.INPUT_MERGE_BY_NAME: + names = {compound.get_name(): [] for compound in compounds} + for compound in compounds: + names[compound.get_name()].append(compound) + elif by == _SBE.INPUT_MERGE_BY_ID: + names = {str(compound.get_compound_number()): [] for compound in compounds} + for compound in compounds: + names[str(compound.get_compound_number())].append(compound) + else: + raise NotImplementedError + return names + + def _merge_enumerations( + self, enumerations: List[Enumeration], by + ) -> List[Enumeration]: + list_result = [] + + # note that if it has been grouped by ID, the first (arbitrary) smile is used + for _, enum_list in self._group_enumerations(enumerations, by).items(): + enum_combined = deepcopy(enum_list[0]) + enum_combined.clear_conformers() + for enum in enum_list: + enum_combined.add_conformers( + deepcopy(enum.get_conformers()), auto_update=False + ) + list_result.append(enum_combined) + return list_result + + def unroll_compounds(self, compounds: list) -> List[Compound]: + list_buffer = [] + for ele in compounds: + if isinstance(ele, list): + list_buffer = list_buffer + self.unroll_compounds(ele) + elif isinstance(ele, Compound): + list_buffer.append(ele) + return list_buffer + + def merge(self, compounds: List[Compound]) -> List[Compound]: + list_result = [] + + # if selected, combined compounds into one depending on the strategy + if self.config.compounds: + dict_grouped = self._group_compounds( + compounds, self.config.merge_compounds_by + ) + number = 0 + for name, compound_list in dict_grouped.items(): + # add the enumerations of all compounds together but do NOT auto-update yet (because enumerations might + # also be merged later on) + comp_combined = Compound(name=name, compound_number=number) + for comp in compound_list: + comp_combined.add_enumerations( + deepcopy(comp.get_enumerations()), auto_update=False + ) + + # as merging of enumerations only makes sense when there was a compound merge, keep + # it on that indentation level + if self.config.enumerations: + enumerations = self._merge_enumerations( + deepcopy(comp_combined.get_enumerations()), + self.config.merge_enumerations_by, + ) + comp_combined.clear_enumerations() + comp_combined.add_enumerations(enumerations, auto_update=False) + + # now, rename the enumerations and conformers + comp_combined.reset_all_ids() + comp_combined.update_all_relations() + + list_result.append(comp_combined) + number += 1 + return list_result diff --git a/icolos/core/step_utils/input_preparator.py b/icolos/core/step_utils/input_preparator.py new file mode 100644 index 0000000..7c4da81 --- /dev/null +++ b/icolos/core/step_utils/input_preparator.py @@ -0,0 +1,535 @@ +from icolos.core.containers.generic import GenericContainer, GenericData +import json +import pandas as pd +from rdkit import Chem + +from icolos.loggers.base_logger import BaseLogger +from icolos.utils.enums.input_enums import InputEnum +from icolos.utils.enums.logging_enums import LoggingConfigEnum +from icolos.utils.enums.write_out_enums import WriteOutEnum +from icolos.utils.general.icolos_exceptions import StepFailed +from icolos.utils.smiles import to_smiles +from icolos.utils.general.files_paths import infer_input_type + +from typing import List, Any +from pydantic import BaseModel + +from icolos.core.step_utils.input_merger import InputMerger, StepMerge +from icolos.core.containers.compound import Enumeration, Compound, Conformer +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +import os +from tempfile import mkdtemp +import requests + +_SBE = StepBaseEnum +_LE = LoggingConfigEnum() +_WE = WriteOutEnum() +_SGE = StepGromacsEnum() +_IE = InputEnum() + + +class StringPath(str): + def __new__(cls, content): + return super().__new__(cls, content) + + +class StringFile(str): + def __new__(cls, content): + return super().__new__(cls, content) + + +class StepData(BaseModel): + class Config: + arbitrary_types_allowed = True + + compounds: List[Compound] = [] + generic: GenericContainer = GenericContainer() + + +class StepCSVInputColumnParameters(BaseModel): + smiles: str + names: str = None + + +class StepInputEnforceIDs(BaseModel): + compound_ids: List = None + enumeration_ids: List = None + + +class StepInputSource(BaseModel): + source: str + source_type: str = None + source_field: str = _IE.SOURCE_FIELD_COMPOUNDS + target_field: str = _IE.SOURCE_FIELD_COMPOUNDS + extension: str = None + format: str = None + delimiter: str = "," + columns: StepCSVInputColumnParameters = None + enforce_ids: StepInputEnforceIDs = None + + +class StepInputParameters(BaseModel): + compounds: List[StepInputSource] = [] + generic: List[StepInputSource] = [] + perturbation_map: List[StepInputSource] = None + merge: StepMerge = StepMerge() + work_dir: str = None + + +class InputPreparator(BaseModel): + workflow: Any = None + logger: BaseLogger = None + + class Config: + underscore_attrs_are_private = True + arbitrary_types_allowed = True + + def __init__(self, **data): + super().__init__(**data) + + def generate_input(self, step_input: StepInputParameters, step_type): + compounds = self._generate_compound_input(step_input) + generic = ( + self._generate_generic_input(step_input, step_type) + if step_input.generic + else GenericContainer() + ) + # Instruct the step to run in a specific workdir, e.g. from a previously failed job or to execute a few related steps in the same dir + if step_input.work_dir is not None: + if os.path.isdir(step_input.work_dir): + work_dir = step_input.work_dir + self.logger.log( + f"Found specified work dir at {step_input.work_dir}", _LE.DEBUG + ) + # now check whether this needs attaching to the workflow for the rest of the steps + if self.workflow.header.global_settings.single_directory: + self.workflow.workflow_data.work_dir = work_dir + self.logger.log( + f"Setting workdir at {step_input.work_dir} to the workflow's workdir", + _LE.DEBUG, + ) + else: + # last resort, if a previous step_id has been passed, get the work_dir from there + + work_dir = self.workflow.find_step_by_step_id( + step_input.work_dir + ).work_dir + elif ( + self.workflow is not None + and self.workflow.header.global_settings.single_directory + ): + # Entire workflow running in a single dir (pmx), either generate one for the first + # step or use the already generated dir + work_dir = self._get_workflow_workdir() + else: + work_dir = None + return ( + StepData(compounds=compounds, generic=generic), + work_dir, + ) + + def _get_workflow_workdir(self): + # check whether the workflow already has one attached, otherwise create one + if self.workflow.workflow_data.work_dir is not None and os.path.isdir( + self.workflow.workflow_data.work_dir + ): + return self.workflow.workflow_data.work_dir + else: + tmp_dir = mkdtemp() + self.workflow.workflow_data.work_dir = tmp_dir + self.logger.log(f"Set workflow's tmpdir to {tmp_dir}", _LE.DEBUG) + return tmp_dir + + def _generate_compound_input(self, step_input: StepInputParameters) -> List: + compounds = [] + for inp in step_input.compounds: + if inp.target_field == _IE.TARGET_FIELD_COMPOUNDS: + buffer = [] + if inp.source_type == _SBE.INPUT_SOURCE_TYPE_FILE: + buffer.append(self._read_compound_input_from_file(inp)) + elif inp.source_type == _SBE.INPUT_SOURCE_TYPE_STEP: + buffer.append(self._read_compound_input_from_step(inp)) + elif inp.source_type == _SBE.INPUT_SOURCE_TYPE_STRING: + buffer.append(self._read_input_from_string(inp)) + else: + raise ValueError( + f"Source type {inp.source_type} for compound input unsupported - abort." + ) + if inp.target_field == _IE.SOURCE_FIELD_COMPOUNDS: + # note: no unrolling here! + compounds = compounds + buffer + + elif inp.target_field == _IE.TARGET_FIELD_CONFORMERS: + if inp.source_type == _SBE.INPUT_SOURCE_TYPE_FILE: + compounds = compounds + self._read_conformers_input_from_file(inp) + if len(compounds) > 0: + compounds = self._apply_compound_merger(step_input, compounds) + return compounds + + def _generate_generic_input( + self, step_input: StepInputParameters, step_type + ) -> GenericContainer: + generic = GenericContainer() + for inp in step_input.generic: + files = self._read_data_to_generic(inp) + generic.add_files(files) + return generic + + def _read_data_to_generic(self, inp: StepInputSource): + ext = inp.extension + if inp.source_type == _SBE.INPUT_SOURCE_TYPE_FILE or os.path.isfile(inp.source): + assert os.path.isfile(inp.source) + try: + with open(inp.source, "r") as f: + data = f.read() + except UnicodeDecodeError: + with open(inp.source, "rb") as f: + data = f.read() + file = GenericData(inp.source.split("/")[-1], data) + return [file] + elif inp.source_type == _SBE.INPUT_SOURCE_TYPE_URL or inp.source.startswith( + "http" + ): + data = self._get_pdb_file_from_api(inp.source) + file_name = inp.source.split("/")[-1].split(".")[0] + "." + inp.extension + file = GenericData(file_name=file_name, file_data=data) + return [file] + elif inp.source_type == _SBE.INPUT_SOURCE_TYPE_DIR or os.path.isdir(inp.source): + assert os.path.isdir(inp.source) + file = GenericData( + file_data=inp.source, + file_name=inp.source.split("/")[-1], + extension=inp.extension, + ) + return [file] + else: + # fall back on step source type + input_step = self.workflow.find_step_by_step_id(inp.source) + files = input_step.data.generic.get_files_by_extension(ext) + + # special case for itp and ndx files, these are included in the topol file so are never arguments + if ext in ["itp", "ndx"]: + return files + + if len(files) == 1: + file = files[0] + # file.set_argument() + return [file] + # else use the argument method + else: + # this introduces a manual check on which file the user wants if there are multiple + file = input_step.data.generic.get_argument_by_extension( + ext, rtn_file_object=True + ) + return [file] + + def _get_pdb_file_from_api(self, pdb_url: str): + response = self._get_request(pdb_url) + if response is None or not response.ok: + return None + return response.text + + def _get_request(self, url, max_tries=5): + trials = 0 + while trials < max_tries: + response = requests.get(url) + if response.status_code == 200: + return response + + def _apply_compound_merger( + self, step_input: StepInputParameters, compounds: List[Compound] + ) -> List[Compound]: + merger = InputMerger(step_input.merge) + compounds = merger.unroll_compounds(compounds) + if not any( + [ + True + for compound in step_input.compounds + if compound.enforce_ids is not None + ] + ): + compounds = merger.merge(compounds=compounds) + + if len(compounds) == 0 and self.logger is not None: + self.logger.log( + "Input list of compounds is empty, this is likely an error.", + _LE.WARNING, + ) + return compounds + + def _read_compound_input_from_step(self, inp: StepInputSource): + input_step = self.workflow.find_step_by_step_id(inp.source) + return input_step.clone_compounds() + + def _read_conformers_input_from_file(self, inp: StepInputSource): + # set up path to input file and extract the input format + input_format = inp.format + if input_format is None and self.logger is not None: + self.logger.log( + "No input format specified, will try to infer type (not recommended).", + _LE.WARNING, + ) + input_format = infer_input_type(inp.source) + input_format = input_format.upper() + + # call the respective loading function + if input_format == _SBE.FORMAT_SDF: + compound = Compound(compound_number=0) + enumeration = Enumeration() + for mol_id, mol in enumerate( + Chem.SDMolSupplier(inp.source, removeHs=False) + ): + conformer = Conformer(conformer=mol, enumeration_object=enumeration) + enumeration.add_conformer(conformer=conformer, auto_update=True) + compound.add_enumeration(enumeration, auto_update=True) + return [compound] + else: + raise ValueError( + f"At the moment, input format {input_format} is not supported." + ) + + def _read_compound_input_from_file(self, inp: StepInputSource): + # set up path to input file and extract the input format + input_format = inp.format + if input_format is None and self.logger is not None: + self.logger.log( + "No input format specified, will try to infer type (not recommended).", + _LE.WARNING, + ) + input_format = infer_input_type(inp.source) + input_format = input_format.upper() + + # call the respective loading function + if input_format == _SBE.FORMAT_SDF: + result = self._read_in_SDF_file(inp) + elif input_format == _SBE.FORMAT_CSV: + result = self._read_in_CSV_file(inp) + elif input_format == _SBE.FORMAT_SMI: + result = self._read_in_SMI_file(inp) + elif input_format == _SBE.FORMAT_JSON: + result = self._read_in_JSON_file(inp) + else: + raise ValueError( + f"At the moment, input format {input_format} is not supported." + ) + + # apply ID enforcement, if specified + return self._enforce_ids(result, inp) + + def _read_input_from_string(self, inp: StepInputSource) -> List[Compound]: + # the strings must be separated by a semi-colon (';'); they may have names in front separated by a colon (':') + elements = inp.source.split(";") + list_compounds = [] + for line_id, line in enumerate(elements): + # it could be, that names are part of the elements; otherwise use the number + # remove trailing or preceding white spaces + parts = [x.strip() for x in line.split(":")] + if len(parts) == 2: + compound = Compound(name=parts[0], compound_number=line_id) + smile = parts[1] + else: + compound = Compound(name=str(line_id), compound_number=line_id) + smile = parts[0] + enumeration = Enumeration(smile=smile, original_smile=smile) + compound.add_enumeration(enumeration, auto_update=True) + list_compounds.append(compound) + + # apply ID enforcement, if specified + return self._enforce_ids(list_compounds, inp) + + def _read_in_SDF_file(self, inp: StepInputSource) -> List[Compound]: + def _get_existing_enumeration(comp_id, enum_id): + comp = _get_existing_compound(comp_id) + for enum in comp.get_enumerations(): + if enum.get_enumeration_id() == int(enum_id): + return enum + raise ValueError + + def _get_existing_compound(idx): + for comp in list_compounds: + if int(idx) == comp.get_compound_number(): + return comp + raise ValueError + + list_compounds = [] + compound_number = 0 + icolos_naming = True + # Parses compounds following the Icolos naming convention of Compound:Enumeration:Conformer to reconstruct the compound object + for mol in Chem.SDMolSupplier(inp.source, removeHs=False): + new_compound = False + new_enumeration = False + mol_name = mol.GetProp(_WE.RDKIT_NAME) + # assuming the mol name follows Icolos conventions + try: + id_parts = mol_name.split(":") + comp_id = id_parts[0] + enum_id = id_parts[1] + + except: + # + icolos_naming = False + comp_id = mol_name + enum_id = 0 + + if icolos_naming: + # reconstruct compound objects + try: + # try to find an existing compound with the correct name + compound = _get_existing_compound(idx=comp_id) + except ValueError: + # the compound does not yet exist, create the object + new_compound = True + try: + # if we have standard icolos compound naming + comp_num = int(comp_id) + except ValueError: + # some other naming scheme + comp_num = compound_number + compound = Compound(name=comp_id, compound_number=comp_num) + try: + # check whether the enumeration exists + enumeration = _get_existing_enumeration(comp_id, enum_id) + except ValueError: + new_enumeration = True + enumeration = Enumeration( + smile=to_smiles(mol), + molecule=mol, + original_smile=to_smiles(mol), + ) + + if len(id_parts) == 3: + # i.e. 0:0:0, we have a conformer + conf = Conformer( + conformer=mol, + enumeration_object=enumeration, + conformer_id=int(id_parts[2]), + ) + enumeration.add_conformer(conf, auto_update=True) + if new_enumeration: + compound.add_enumeration(enumeration, auto_update=True) + if new_compound: + list_compounds.append(compound) + + else: + # if non-standard naming conventions, simply load each mol into a new compound object, with single enum/conf + compound = Compound(name=comp_id, compound_number=compound_number) + enum = Enumeration( + smile=to_smiles(mol), + molecule=mol, + original_smile=to_smiles(mol), + enumeration_id=0, + ) + enum.add_conformer( + Conformer(conformer=mol, enumeration_object=mol, conformer_id=0), + auto_update=True, + ) + compound.add_enumeration(enumeration=enum) + list_compounds.append(compound) + + compound_number += 1 + return list_compounds + + def _read_in_SMI_file(self, inp: StepInputSource) -> List[Compound]: + list_compounds = [] + with open(inp.source, "r") as f: + # while the SMI file definition requires a name (separated by blanks) for each line + # as well, assume that this might not be present + lines = [line.rstrip() for line in f.readlines()] + for line_id, line in enumerate(lines): + if line == "": + continue + + parts = line.split() + if len(parts) == 2: + compound = Compound(name=parts[1], compound_number=line_id) + else: + compound = Compound(name=str(line_id), compound_number=line_id) + enumeration = Enumeration(smile=parts[0], original_smile=parts[0]) + compound.add_enumeration(enumeration, auto_update=True) + list_compounds.append(compound) + return list_compounds + + def _read_in_JSON_file(self, inp: StepInputSource) -> List[Compound]: + list_compounds = [] + + # load input + with open(inp.source, "r") as f: + inp_json = f.read().replace("\r", "").replace("\n", "") + inp_dict = json.loads(inp_json) + + comp_id = 0 + for name, smile in zip(inp_dict[_IE.JSON_NAMES], inp_dict[_IE.JSON_SMILES]): + compound = Compound(name=name, compound_number=comp_id) + enumeration = Enumeration(smile=smile, original_smile=smile) + compound.add_enumeration(enumeration, auto_update=True) + list_compounds.append(compound) + comp_id += 1 + + return list_compounds + + def _read_in_CSV_file(self, inp: StepInputSource) -> List[Compound]: + list_compounds = [] + delimiter = inp.delimiter + data = pd.read_csv(inp.source, delimiter=delimiter) + + smiles_column = inp.columns.smiles + if smiles_column not in list(data.columns): + raise StepFailed( + f"Column name for the smiles either not set or not found in input CSV." + ) + + # deal with names (if specified) + names_column = inp.columns.names + if names_column is None: + names_compounds = None + else: + if names_column not in list(data.columns): + raise StepFailed( + f"Specified column name ({names_column}) for the names either not found in input CSV." + ) + else: + names_compounds = [ + str(name).strip() for name in data[names_column].tolist() + ] + + # build the compounds + smiles = [str(line).strip() for line in data[smiles_column].tolist()] + for number in range(len(smiles)): + if names_compounds is not None: + compound = Compound( + name=names_compounds[number], compound_number=number + ) + else: + compound = Compound(name=str(number), compound_number=number) + enumeration = Enumeration( + smile=smiles[number], original_smile=smiles[number] + ) + compound.add_enumeration(enumeration, auto_update=True) + list_compounds.append(compound) + return list_compounds + + def _enforce_ids( + self, compounds: List[Compound], inp: StepInputSource + ) -> List[Compound]: + if inp.enforce_ids is not None: + if inp.enforce_ids.compound_ids is not None: + for comp_idx, comp in enumerate(compounds): + comp.set_compound_number( + int(inp.enforce_ids.compound_ids[comp_idx]) + ) + + # set enumeration ids + enum_id_idx = 0 + if inp.enforce_ids.enumeration_ids is not None: + for comp in compounds: + for enum in comp.get_enumerations(): + enum.set_enumeration_id( + int(inp.enforce_ids.enumeration_ids[enum_id_idx]) + ) + enum_id_idx += 1 + if self.logger is not None: + self.logger.log( + "Enforced IDs for compounds and enumerations specified (merging disabled).", + _LE.DEBUG, + ) + return compounds diff --git a/icolos/core/step_utils/rdkit_utils.py b/icolos/core/step_utils/rdkit_utils.py new file mode 100644 index 0000000..1a93267 --- /dev/null +++ b/icolos/core/step_utils/rdkit_utils.py @@ -0,0 +1,10 @@ +from rdkit import Chem + + +def to_smiles(mol, isomericSmiles=False): + """ + Converts a Mol object into a canonical SMILES string. + :param mol: Mol object. + :return: A SMILES string. + """ + return Chem.MolToSmiles(mol, isomericSmiles=isomericSmiles) diff --git a/icolos/core/step_utils/retry.py b/icolos/core/step_utils/retry.py new file mode 100644 index 0000000..e4db30a --- /dev/null +++ b/icolos/core/step_utils/retry.py @@ -0,0 +1,42 @@ +import functools +import time +from typing import Any +from pydantic import BaseModel + + +class RetryResult(BaseModel): + success: bool + tries: int + result: Any = None + exception: Exception = None + + class Config: + arbitrary_types_allowed = True + + def __init__(self, **data): + super().__init__(**data) + + +# TODO: do a unit test for this +def retry(n_tries, retry_wait_seconds, allowed_exceptions=()): + if n_tries < 1: + n_tries = 1 + + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs) -> RetryResult: + exc = None + for idx in range(n_tries): + try: + result = func(*args, **kwargs) + return RetryResult( + success=True, tries=idx + 1, result=result, exception=None + ) + except allowed_exceptions as e: + exc = e + time.sleep(retry_wait_seconds) + return RetryResult(success=False, tries=n_tries, result=None, exception=exc) + + return wrapper + + return decorator diff --git a/icolos/core/step_utils/run_variables_resolver.py b/icolos/core/step_utils/run_variables_resolver.py new file mode 100644 index 0000000..3305f6a --- /dev/null +++ b/icolos/core/step_utils/run_variables_resolver.py @@ -0,0 +1,67 @@ +from copy import deepcopy + +from icolos.core.containers.compound import Enumeration, Compound, Conformer +from icolos.utils.enums.write_out_enums import RunVariablesEnum + +_RVE = RunVariablesEnum() + + +class RunVariablesResolver: + def __init__(self): + pass + + def _replace(self, input_str: str, pattern: str, replacement) -> str: + if replacement is not None: + pattern = _RVE.PREFIX + pattern + _RVE.POSTFIX + input_str = input_str.replace(pattern, str(replacement)) + return input_str + + def resolve_compound_level(self, input_str: str, comp: Compound) -> str: + comp = deepcopy(comp) + resolved_str = self._replace( + input_str, _RVE.COMPOUND_ID, comp.get_compound_number() + ) + resolved_str = self._replace(resolved_str, _RVE.COMPOUND_NAME, comp.get_name()) + return resolved_str + + def resolve_enumeration_level(self, input_str: str, enum: Enumeration) -> str: + enum = deepcopy(enum) + resolved_str = self._replace( + input_str, _RVE.ENUMERATION_ID, enum.get_enumeration_id() + ) + resolved_str = self._replace( + resolved_str, _RVE.ENUMERATION_STRING, enum.get_index_string() + ) + return resolved_str + + def resolve_conformer_level(self, input_str: str, conf: Conformer) -> str: + conf = deepcopy(conf) + resolved_str = self._replace( + input_str, _RVE.CONFORMER_ID, conf.get_conformer_id() + ) + resolved_str = self._replace( + resolved_str, _RVE.CONFORMER_STRING, conf.get_index_string() + ) + return resolved_str + + def resolve(self, input_str: str, input_object) -> str: + if not isinstance(input_str, str): + return input_str + + if isinstance(input_object, Conformer): + input_str = self.resolve_compound_level( + input_str, input_object.get_enumeration_object().get_compound_object() + ) + input_str = self.resolve_enumeration_level( + input_str, input_object.get_enumeration_object() + ) + return self.resolve_conformer_level(input_str, input_object) + elif isinstance(input_object, Enumeration): + input_str = self.resolve_compound_level( + input_str, input_object.get_compound_object() + ) + return self.resolve_enumeration_level(input_str, input_object) + elif isinstance(input_object, Compound): + return self.resolve_compound_level(input_str, input_object) + else: + raise ValueError(f'Object of type "{type(input_object)}" is not supported.') diff --git a/icolos/core/step_utils/sdconvert_util.py b/icolos/core/step_utils/sdconvert_util.py new file mode 100644 index 0000000..fde466d --- /dev/null +++ b/icolos/core/step_utils/sdconvert_util.py @@ -0,0 +1,68 @@ +from icolos.loggers.steplogger import StepLogger +from icolos.utils.execute_external.sdconvert import SDConvertExecutor + +from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum +from icolos.utils.enums.logging_enums import LoggingConfigEnum +from icolos.utils.general.icolos_exceptions import StepFailed + +_SEE = SchrodingerExecutablesEnum() +_LE = LoggingConfigEnum() + + +class SDConvertUtil: + def __init__(self, prefix_execution: str = None, binary_location: str = None): + self._logger = StepLogger() + + # initialize and check executor + self.executor = SDConvertExecutor( + prefix_execution=prefix_execution, binary_location=binary_location + ) + if not self.executor.is_available(): + raise StepFailed("Cannot initialize sdconvert backend - abort.") + self._logger.log(f"Checked sdconvert availability - valid.", _LE.DEBUG) + + def execute(self, arguments: list): + execution_result = self.executor.execute( + command=_SEE.SDCONVERT, arguments=arguments, check=True + ) + if execution_result.returncode != 0: + self._logger.log( + f"Could not execute sdconvert (returncode != 0) with error: {execution_result.stderr}.", + _LE.ERROR, + ) + + def mae2sdf(self, mae_file: str, sdf_file: str): + arguments = [ + "".join([_SEE.SDCONVERT_I, _SEE.SDCONVERT_FORMAT_MAE]), + mae_file, + "".join([_SEE.SDCONVERT_O, _SEE.SDCONVERT_FORMAT_SD]), + sdf_file, + ] + self.execute(arguments=arguments) + + def sdf2mae(self, sdf_file: str, mae_file: str): + arguments = [ + "".join([_SEE.SDCONVERT_I, _SEE.SDCONVERT_FORMAT_SD]), + sdf_file, + "".join([_SEE.SDCONVERT_O, _SEE.SDCONVERT_FORMAT_MAE]), + mae_file, + ] + self.execute(arguments=arguments) + + def pdb2mae(self, pdb_file: str, mae_file: str): + arguments = [ + "".join([_SEE.SDCONVERT_I, _SEE.SDCONVERT_FORMAT_SD]), + pdb_file, + "".join([_SEE.SDCONVERT_O, _SEE.SDCONVERT_FORMAT_MAE]), + mae_file, + ] + self.execute(arguments=arguments) + + def sdf2pdb(self, sdf_file: str, pdb_file: str): + arguments = [ + "".join([_SEE.SDCONVERT_I, _SEE.SDCONVERT_FORMAT_SD]), + pdb_file, + "".join([_SEE.SDCONVERT_O, _SEE.SDCONVERT_FORMAT_PDB]), + sdf_file, + ] + self.execute(arguments=arguments) diff --git a/icolos/core/step_utils/step_writeout.py b/icolos/core/step_utils/step_writeout.py new file mode 100644 index 0000000..5bd4eac --- /dev/null +++ b/icolos/core/step_utils/step_writeout.py @@ -0,0 +1,507 @@ +import os +from collections import OrderedDict +from copy import deepcopy + +import numpy as np +import pandas as pd +import json +from typing import List +from pydantic import BaseModel, PrivateAttr +from rdkit import Chem +from pathlib import Path + +from icolos.core.containers.compound import Compound, Conformer +from icolos.core.step_utils.input_preparator import StepData +from icolos.core.step_utils.run_variables_resolver import RunVariablesResolver +from icolos.loggers.steplogger import StepLogger +from icolos.utils.enums.logging_enums import LoggingConfigEnum +from icolos.utils.enums.step_enums import StepBaseEnum +from icolos.utils.enums.write_out_enums import WriteOutEnum + +_WE = WriteOutEnum() +_LE = LoggingConfigEnum() +_SBE = StepBaseEnum + + +class StepWriteoutCompoundAggregationParameters(BaseModel): + mode: _SBE = _SBE.WRITEOUT_COMP_AGGREGATION_MODE_ALL + # Union[ + # _SBE.WRITEOUT_COMP_AGGREGATION_MODE_ALL, + # _SBE.WRITEOUT_COMP_AGGREGATION_MODE_BESTPERCOMPOUND, + # _SBE.WRITEOUT_COMP_AGGREGATION_MODE_BESTPERENUMERATION, + # ] = _SBE.WRITEOUT_COMP_AGGREGATION_MODE_ALL + highest_is_best: bool = True + key: str = None + + +class StepWriteoutCompoundParameters(BaseModel): + category: _SBE + # Union[ + # _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS, + # _SBE.WRITEOUT_COMP_CATEGORY_ENUMERATIONS, + # _SBE.WRITEOUT_COMP_CATEGORY_EXTRADATA, + # ] + aggregation: StepWriteoutCompoundAggregationParameters = ( + StepWriteoutCompoundAggregationParameters() + ) + key: str = None + selected_tags: List[str] = None + + +class StepWriteoutGenericParameters(BaseModel): + key: str + + +class StepWriteoutDestinationParameters(BaseModel): + resource: str = None + type: _SBE = _SBE.WRITEOUT_DESTINATION_TYPE_FILE + # Union[ + # _SBE.WRITEOUT_DESTINATION_TYPE_FILE, + # _SBE.WRITEOUT_DESTINATION_TYPE_REINVENT, + # _SBE.WRITEOUT_DESTINATION_TYPE_STDERR, + # _SBE.WRITEOUT_DESTINATION_TYPE_STDOUT, + # _SBE.WRITEOUT_DESTINATION_TYPE_REST, + # ] + # Union[_SBE.FORMAT_SDF, _SBE.FORMAT_CSV, _SBE.FORMAT_TXT] + format: _SBE = _SBE.FORMAT_TXT + merge: bool = True + mode: _SBE = _SBE.WRITEOUT_DESTINATION_BASE_NAME + # Union[ + # _SBE.WRITEOUT_DESTINATION_AUTOMATIC, + # _SBE.WRITEOUT_DESTINATION_BASE_NAME, + # _SBE.WRITEOUT_DESTINATION_DIR, + # ] = _SBE.WRITEOUT_DESTINATION_BASE_NAME + + +class StepWriteoutParameters(BaseModel): + compounds: StepWriteoutCompoundParameters = None + generic: StepWriteoutGenericParameters = None + destination: StepWriteoutDestinationParameters = None + + +class WriteOutHandler(BaseModel): + + config: StepWriteoutParameters + data: StepData = None + + class Config: + underscore_attrs_are_private = True + + _logger = PrivateAttr() + + def __init__(self, **data): + super().__init__(**data) + self._logger = StepLogger() + + def set_data(self, data: StepData): + self.data = deepcopy(data) + + def get_data(self) -> StepData: + return self.data + + def _handle_destination_type(self): + if self.config.destination.type.lower() in ( + _SBE.WRITEOUT_DESTINATION_TYPE_FILE, + _SBE.WRITEOUT_DESTINATION_TYPE_REINVENT, + ): + return self.config.destination.resource + elif ( + self.config.destination.type.lower() == _SBE.WRITEOUT_DESTINATION_TYPE_REST + ): + raise ValueError("REST end-point destination type not supported yet.") + raise ValueError( + f"Destination type {self.config.destination.type} not supported." + ) + + def _write_compounds(self): + resource = self._handle_destination_type() + resolver = RunVariablesResolver() + if self.config.compounds.category == _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS: + if self.config.destination.format.upper() == _SBE.FORMAT_CSV: + if self.config.destination.format.upper() != _SBE.FORMAT_CSV: + raise NotImplementedError( + "Only supporting CSV write-out format for tabular data." + ) + self._writeout_tabular() + elif self.config.destination.format.upper() == _SBE.FORMAT_JSON: + self._writeout_reinvent() + elif self.config.destination.format.upper() == _SBE.FORMAT_SDF: + + def _write_compounds(compounds: List[Compound], resource: str): + # TODO: deal with resolving resources differently (also for writing enumerations below) + resource_resolved = resource + for compound in compounds: + for enum in compound.get_enumerations(): + if len(enum.get_conformers()) > 0: + resource_resolved = resolver.resolve(resource, enum[0]) + break + self._make_folder(resource_resolved) + + writer = Chem.SDWriter(resource_resolved) + written = 0 + + for comp in compounds: + for enum in comp: + for conf in enum: + molecule = conf.get_molecule() + if ( + comp.get_name() is not None + and comp.get_name() != "" + ): + molecule.SetProp(_WE.COMPOUND_NAME, comp.get_name()) + molecule.SetProp( + _WE.RDKIT_NAME, conf.get_index_string() + ) + molecule.SetProp( + _WE.INDEX_STRING, conf.get_index_string() + ) + writer.write(molecule) + written += 1 + writer.close() + self._logger.log( + f"Wrote {written} conformers to file {resource_resolved}.", + _LE.DEBUG, + ) + + # TODO: At the moment, this only splits at the compound level (taking the first conformer for resolving), + # add full generic support. + if self.config.destination.merge: + _write_compounds(self.data.compounds, resource=resource) + else: + for comp in self.data.compounds: + _write_compounds([comp], resource) + elif self.config.compounds.category == _SBE.WRITEOUT_COMP_CATEGORY_ENUMERATIONS: + if not self.config.destination.format.upper() == _SBE.FORMAT_SDF: + raise NotImplementedError( + "This write-out is not supported for enumerations." + ) + else: + + def _write_compounds(compounds: List[Compound], resource: str): + # TODO: deal with resolving resources differently (also for writing conformers above) + resource_resolved = resource + for compound in compounds: + if len(compound.get_enumerations()) > 0: + resource_resolved = resolver.resolve(resource, compounds[0]) + break + + self._make_folder(resource_resolved) + writer = Chem.SDWriter(resource_resolved) + written = 0 + for comp in compounds: + for enum in comp: + molecule = enum.get_molecule() + if comp.get_name() is not None and comp.get_name() != "": + molecule.SetProp(_WE.COMPOUND_NAME, comp.get_name()) + molecule.SetProp(_WE.RDKIT_NAME, enum.get_index_string()) + molecule.SetProp(_WE.INDEX_STRING, enum.get_index_string()) + writer.write(molecule) + written += 1 + writer.close() + self._logger.log( + f"Wrote {written} enumeration molecules to file {resource_resolved}.", + _LE.DEBUG, + ) + + if self.config.destination.merge: + _write_compounds(self.data.compounds, resource=resource) + else: + for comp in self.data.compounds: + _write_compounds([comp], resource) + elif self.config.compounds.category == _SBE.WRITEOUT_COMP_CATEGORY_EXTRADATA: + if self.config.destination.format.upper() != _SBE.FORMAT_TXT: + raise ValueError( + f"For writing out extra-data (attached to conformers), only TXT is supported as format." + ) + # TODO: Does merging here makes any sense? + for comp in self.data.compounds: + for enum in comp: + for conf in enum: + resource_resolved = resolver.resolve(resource, conf) + self._make_folder(resource_resolved) + with open(resource_resolved, "w") as f: + content = conf.get_extra_data()[self.config.compounds.key] + if isinstance(content, list): + for line in content: + f.write(line.rstrip("\n") + "\n") + elif isinstance(content, str): + f.write(content) + else: + raise ValueError( + "Extra data must be either a string or a list of strings." + ) + else: + raise ValueError(f"{self.config.compounds.category} not supported.") + + def _write_generic_data(self): + if ( + self.config.destination.type.lower() != _SBE.WRITEOUT_DESTINATION_TYPE_FILE + or self.config.destination.format.upper() != _SBE.FORMAT_TXT + ): + raise ValueError( + 'When writing out generic data, you must use type "file" and format "txt".' + ) + # resource should be a directory for writeout only, in most cases it should already exist + resource = self._handle_destination_type() + self._make_folder(resource) + if self.config.destination.mode == _SBE.WRITEOUT_DESTINATION_DIR: + # The output path should be a directory only + if not os.path.isdir(resource): + raise AssertionError( + "When specifying a directory, the writeout destination resource must not be a filepath!" + ) + # write out all files from that step with the required extension + for idx, file in enumerate( + self.data.generic.get_files_by_extension(self.config.generic.key) + ): + if self.config.destination.mode == _SBE.WRITEOUT_DESTINATION_BASE_NAME: + parts = resource.split(".") + resource = parts[0] + f"_{idx}." + parts[1] + file.write(resource, join=False) + elif self.config.destination.mode == _SBE.WRITEOUT_DESTINATION_AUTOMATIC: + parts = file.get_file_name().split(".") + file_name = parts[0] + f"_{idx}." + parts[1] + resource = os.path.join("/".join(resource.split("/")[:-1]), file_name) + file.write(resource, join=False) + elif self.config.destination.mode == _SBE.WRITEOUT_DESTINATION_DIR: + resource = resource + assert os.path.isdir(resource) + file.write(resource, join=True, final_writeout=True) + + def write(self): + if self.config.compounds is not None and self.config.generic is not None: + raise ValueError("Only specify either compounds or generic data, not both.") + + if self.config.compounds is not None: + self._write_compounds() + elif self.config.generic is not None: + self._write_generic_data() + else: + raise ValueError("Either compounds or generic data has to be specified.") + + def _writeout_reinvent(self): + def _get_conf_by_comp_name(confs: List[Conformer], comp_name: str) -> Conformer: + # assumes there is at most 1 conformer / compound left at this stage, as is required by REINVENT + for conf in confs: + if conf.get_compound_name() == comp_name: + return conf + return None + + dict_result = {_WE.JSON_RESULTS: []} + tags = self._get_selected_tags() + + # add names, including those for which no conformer has been obtained + dict_result[_WE.JSON_NAMES] = [comp.get_name() for comp in self.data.compounds] + + # do aggregation (might remove conformers) + confs_unrolled = self._apply_aggregation(self.data.compounds) + + # add values (derived from molecule tags) + # TODO: if no conformers are left, we need to write out an empty JSON that tells REINVENT that none worked + for tag in tags: + values = [] + for comp_name in dict_result[_WE.JSON_NAMES]: + conf = _get_conf_by_comp_name(confs=confs_unrolled, comp_name=comp_name) + if conf is not None: + try: + value = conf.get_molecule().GetProp(tag) + except KeyError: + value = _WE.JSON_NA + else: + value = _WE.JSON_NA + values.append(value.strip()) + dict_result[_WE.JSON_RESULTS].append( + {_WE.JSON_VALUES_KEY: tag, _WE.JSON_VALUES: values} + ) + + # TODO: refactor that part + resource = self._handle_destination_type() + if len(confs_unrolled) > 0: + resolver = RunVariablesResolver() + resource_resolved = resolver.resolve(resource, confs_unrolled[0]) + else: + resource_resolved = resource + self._logger.log( + f"No conformers obtained, write-out resource resolving disabled.", + _LE.WARNING, + ) + self._make_folder(resource_resolved) + + # write-out according to destination type + # TODO: there seems to be an issue here, when multiple write-out blocks are specified and no conformers are + # left: only the first block gets executed and if that's not the REINVENT one, the run will crash + if self.config.destination.type.lower() in ( + _SBE.WRITEOUT_DESTINATION_TYPE_REINVENT, + _SBE.WRITEOUT_DESTINATION_TYPE_FILE, + ): + with open(resource_resolved, "w") as f: + json.dump(dict_result, f, indent=4) + elif self.config.destination.type.lower() in ( + _SBE.WRITEOUT_DESTINATION_TYPE_STDOUT, + _SBE.WRITEOUT_DESTINATION_TYPE_STDERR, + ): + json.dump(dict_result, resource_resolved, indent=4) + else: + raise ValueError( + f"Destination type {self.config.destination.type} not supported for this function." + ) + + def _get_selected_tags(self) -> List[str]: + # this function returns a list of tags (strings) that are to be considered for e.g. tabular write-out + # if the respective configuration field is set to "None", use all tags (over all compounds in a batch) + if self.config.compounds.selected_tags is not None: + if isinstance(self.config.compounds.selected_tags, list): + list_tags = self.config.compounds.selected_tags + elif isinstance(self.config.compounds.selected_tags, str): + list_tags = [self.config.compounds.selected_tags] + else: + raise ValueError( + f'Tag selection "{self.config.compounds.selected_tags}" set to illegal value.' + ) + else: + # get all tags for all compounds + list_tags = [] + for comp in self.data.compounds: + for enum in comp: + for conf in enum: + list_tags = list_tags + list(conf.get_molecule().GetPropNames()) + + list_tags = list(set(list_tags)) + return list_tags + + def _initialize_dict_csv( + self, keys: List[str], nrow: int, fill_value=np.NaN + ) -> OrderedDict: + return_dict = OrderedDict() + for key in keys: + return_dict[key] = [fill_value for _ in range(nrow)] + return return_dict + + def _apply_aggregation(self, compounds: List[Compound]) -> List[Conformer]: + if ( + self.config.compounds.aggregation.mode + == _SBE.WRITEOUT_COMP_AGGREGATION_MODE_ALL + ): + return self._unroll_conformers(compounds) + + confs_remaining = [] + if ( + self.config.compounds.aggregation.mode + == _SBE.WRITEOUT_COMP_AGGREGATION_MODE_BESTPERENUMERATION + ): + raise NotImplementedError("Best per enumeration is not yet implemented.") + elif ( + self.config.compounds.aggregation.mode + == _SBE.WRITEOUT_COMP_AGGREGATION_MODE_BESTPERCOMPOUND + ): + for comp in compounds: + unrolled_conformers = self._unroll_conformers([comp]) + if len(unrolled_conformers) == 0: + continue + values = [ + float( + conf.get_molecule().GetProp( + self.config.compounds.aggregation.key + ) + ) + for conf in unrolled_conformers + ] + index_best = ( + values.index(max(values)) + if self.config.compounds.aggregation.highest_is_best + else values.index(min(values)) + ) + confs_remaining.append(unrolled_conformers[index_best]) + return confs_remaining + + def _unroll_conformers(self, compounds: List[Compound]) -> List[Conformer]: + result = [] + for comp in compounds: + for enum in comp: + for conf in enum: + result.append(conf) + return result + + def _writeout_tabular(self): + # get all tags of the molecules that are to be considered + tags = self._get_selected_tags() + + # remove the compound_name and _Name, as they will be specifically added at the beginning + if _WE.COMPOUND_NAME in tags: + tags.remove(_WE.COMPOUND_NAME) + if _WE.RDKIT_NAME in tags: + tags.remove(_WE.RDKIT_NAME) + + # do aggregation (might remove conformers) + confs_unrolled = self._apply_aggregation(self.data.compounds) + + # initialize a dictionary with all tags as keys and filled with NA for every position + dict_result = self._initialize_dict_csv( + keys=[_WE.RDKIT_NAME, _WE.COMPOUND_NAME] + tags, nrow=len(confs_unrolled) + ) + + # resolve resource + # TODO: refactor that part + resource = self._handle_destination_type() + resolver = RunVariablesResolver() + if len(confs_unrolled) == 0: + raise ValueError("No conformers found.") + resource_resolved = resolver.resolve(resource, confs_unrolled[0]) + self._make_folder(resource_resolved) + + # populate the dictionary with the values (if present) + for irow in range(len(confs_unrolled)): + # add the internal Icolos identifier + conf = confs_unrolled[irow] + dict_result[_WE.RDKIT_NAME][irow] = conf.get_index_string() + + # add the compound name, if specified + name = conf.get_compound_name() + dict_result[_WE.COMPOUND_NAME][irow] = "" if name is None else name + for tag in tags: + try: + value = conf.get_molecule().GetProp(tag).strip() + except KeyError: + value = np.nan + dict_result[tag][irow] = value + + # do the writeout (after sanitation) + df_result = pd.DataFrame.from_dict(dict_result) + df_result = self._sanitize_df_columns(df=df_result) + df_result.to_csv( + path_or_buf=resource_resolved, + sep=",", + na_rep="", + header=True, + index=False, + mode="w", + quoting=None, + ) + self._logger.log( + f"Wrote data frame with {len(confs_unrolled)} rows and {len(tags)} columns to file {resource_resolved}.", + _LE.DEBUG, + ) + + def _sanitize_df_columns(self, df: pd.DataFrame) -> pd.DataFrame: + cols_before = df.columns.to_list() + df.columns = ( + df.columns.str.strip() + .str.replace(" ", "_") + .str.replace("(", "") + .str.replace(")", "") + .str.replace("/", "_") + .str.replace("[", "") + .str.replace("]", "") + ) + for col_before, col_after in zip(cols_before, df.columns.to_list()): + if col_before != col_after: + self._logger.log( + f"Sanitized column name {col_before} to {col_after}.", _LE.WARNING + ) + return df + + def _make_folder(self, path): + if isinstance(path, str): + if not os.path.isdir(path): + path = os.path.dirname(path) + Path(path).mkdir(parents=True, exist_ok=True) diff --git a/icolos/core/step_utils/structcat_util.py b/icolos/core/step_utils/structcat_util.py new file mode 100644 index 0000000..5668756 --- /dev/null +++ b/icolos/core/step_utils/structcat_util.py @@ -0,0 +1,68 @@ +from typing import List + +from icolos.loggers.steplogger import StepLogger +from icolos.utils.execute_external.openbabel import OpenBabelExecutor +from icolos.utils.execute_external.structcat import StructcatExecutor + +from icolos.utils.enums.program_parameters import ( + OpenBabelEnum, + SchrodingerExecutablesEnum, +) +from icolos.utils.enums.logging_enums import LoggingConfigEnum +from icolos.utils.general.icolos_exceptions import StepFailed + +_SEE = SchrodingerExecutablesEnum() +_LE = LoggingConfigEnum() + +_OE = OpenBabelEnum() + + +class StructcatUtil: + def __init__( + self, + prefix_execution: str = None, + binary_location: str = None, + backend: str = _SEE.STRUCTCAT, + ): + self._logger = StepLogger() + self._backend = backend + # initialize and check executor + if self._backend == _SEE.STRUCTCAT: + self.executor = StructcatExecutor( + prefix_execution=prefix_execution, binary_location=binary_location + ) + elif self._backend == _OE.OBABEL: + self.executor = OpenBabelExecutor() + + if not self.executor.is_available(): + raise StepFailed("Cannot initialize structcat backend - abort.") + + def concatenate( + self, + input_files: List[str], + output_file: str, + location: str = None, + backend=_SEE.STRUCTCAT, + ): + if self._backend == _SEE.STRUCTCAT: + arguments = [] + for input_file in input_files: + arguments = arguments + [ + _SEE.STRUCTCAT_I, + input_file, + ] + arguments = arguments + [ + _SEE.STRUCTCAT_O, + output_file, + ] + self.executor.execute( + command=_SEE.STRUCTCAT, arguments=arguments, check=True + ) + + elif self._backend == _OE.OBABEL: + arguments = input_files + arguments.append("-O") + arguments.append(output_file) + self.executor.execute( + command=_OE.OBABEL, arguments=arguments, check=True, location=location + ) diff --git a/icolos/core/step_utils/structconvert.py b/icolos/core/step_utils/structconvert.py new file mode 100644 index 0000000..b7f0798 --- /dev/null +++ b/icolos/core/step_utils/structconvert.py @@ -0,0 +1,69 @@ +from icolos.loggers.steplogger import StepLogger +from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum +from icolos.utils.execute_external.schrodinger import SchrodingerExecutor +from icolos.utils.general.icolos_exceptions import StepFailed +from icolos.utils.enums.logging_enums import LoggingConfigEnum + +_LE = LoggingConfigEnum() +_SEE = SchrodingerExecutablesEnum() + + +class StructConvert: + """ + Utility for converting structure files with Schrodinger's StructConvert + """ + + def __init__(self, prefix_execution: str, binary_location: str = None) -> None: + self._logger = StepLogger() + + self.executor = SchrodingerExecutor( + binary_location=binary_location, prefix_execution=prefix_execution + ) + if not self.executor.is_available(): + raise StepFailed("Cannot initialize sdconvert backend - abort.") + self._logger.log(f"Checked sdconvert availability - valid.", _LE.DEBUG) + + def execute(self, arguments: list): + execution_result = self.executor.execute( + command=_SEE.STRUCTCONVERT, arguments=arguments, check=True + ) + if execution_result.returncode != 0: + self._logger.log( + f"Could not execute sdconvert (returncode != 0) with error: {execution_result.stderr}.", + _LE.ERROR, + ) + + def convert(self, input_file: str, output_file: str): + arguments = [ + input_file, + output_file, + ] + self.execute(arguments=arguments) + + def pdb2mae(self, pdb_file: str, mae_file: str): + # new schrodinger does not check this, needs to be done here + assert pdb_file.endswith(".pdb") + assert mae_file.endswith(".mae") + arguments = [ + pdb_file, + mae_file, + ] + self.execute(arguments=arguments) + + def sdf2pdb(self, sdf_file: str, pdb_file: str): + assert sdf_file.endswith(".sdf") + assert pdb_file.endswith(".pdb") + arguments = [ + sdf_file, + pdb_file, + ] + self.execute(arguments=arguments) + + def mae2pdb(self, mae_file: str, pdb_file: str): + assert mae_file.endswith(".mae") + assert pdb_file.endswith(".pdb") + arguments = [ + mae_file, + pdb_file, + ] + self.execute(arguments=arguments) diff --git a/icolos/core/steps_utils.py b/icolos/core/steps_utils.py new file mode 100644 index 0000000..ace576b --- /dev/null +++ b/icolos/core/steps_utils.py @@ -0,0 +1,22 @@ +from icolos.core.workflow_steps.step import StepBase +from icolos.utils.enums.step_enums import StepBaseEnum +from icolos.utils.general.convenience_functions import nested_get +from icolos.utils.enums.step_initialization_enum import StepInitializationEnum +from icolos.utils.enums.flow_control_enums import FlowControlInitializationEnum + +_IE = StepInitializationEnum() +_FCE = FlowControlInitializationEnum() + + +def initialize_step_from_dict(step_conf: dict) -> StepBase: + _STE = StepBaseEnum + step_type = nested_get(step_conf, _STE.STEP_TYPE, default=None) + step_type = None if step_type is None else step_type.upper() + if step_type in _IE.STEP_INIT_DICT.keys(): + return _IE.STEP_INIT_DICT[step_type](**step_conf) + elif step_type in _FCE.FLOW_CONTROL_INIT_DICT.keys(): + return _FCE.FLOW_CONTROL_INIT_DICT[step_type](**step_conf) + else: + raise ValueError( + f"Backend for step {nested_get(step_conf, _STE.STEPID, '')} unknown." + ) diff --git a/icolos/core/workflow_steps/__init__.py b/icolos/core/workflow_steps/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/core/workflow_steps/autodockvina/__init__.py b/icolos/core/workflow_steps/autodockvina/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/core/workflow_steps/autodockvina/docking.py b/icolos/core/workflow_steps/autodockvina/docking.py new file mode 100644 index 0000000..0d8df83 --- /dev/null +++ b/icolos/core/workflow_steps/autodockvina/docking.py @@ -0,0 +1,324 @@ +import os +import shutil +import tempfile +from typing import List, Tuple + +from pydantic import BaseModel, Field +from rdkit import Chem +from copy import deepcopy + +from icolos.utils.enums.step_enums import StepAutoDockVinaEnum, StepBaseEnum +from icolos.utils.execute_external.autodockvina import AutoDockVinaExecutor +from icolos.utils.execute_external.openbabel import OpenBabelExecutor +from icolos.utils.general.icolos_exceptions import StepFailed +from icolos.utils.general.files_paths import gen_tmp_file +from icolos.core.containers.compound import Conformer +from icolos.utils.enums.program_parameters import AutoDockVinaEnum, OpenBabelEnum +from icolos.core.workflow_steps.step import _LE, StepBase +from icolos.utils.general.parallelization import Subtask, SubtaskContainer, Parallelizer + +_SBE = StepBaseEnum +_ADE = AutoDockVinaEnum() +_OBE = OpenBabelEnum() +_SAE = StepAutoDockVinaEnum() + + +class ADVSearchSpace(BaseModel): + center_x: float = Field(alias="--center_x", default=None) + center_y: float = Field(alias="--center_y", default=None) + center_z: float = Field(alias="--center_z", default=None) + size_x: float = Field(alias="--size_x", default=15.0) + size_y: float = Field(alias="--size_y", default=15.0) + size_z: float = Field(alias="--size_z", default=15.0) + + +class ADVConfiguration(BaseModel): + seed: int = 42 + number_poses: int = 1 + search_space: ADVSearchSpace = ADVSearchSpace() + receptor_path: str = None + + +class ADVAdditional(BaseModel): + configuration: ADVConfiguration = ADVConfiguration() + grid_ids: List[str] = ["grid0"] + + +class StepAutoDockVina(StepBase, BaseModel): + + _openbabel_executor: OpenBabelExecutor = None + adv_additional: ADVAdditional = None + + def __init__(self, **data): + super().__init__(**data) + + # initialize the executor and test availability + self._initialize_backend(executor=AutoDockVinaExecutor) + self._check_backend_availability() + + # initialize the executor for all "OpenBabel" + self._openbabel_executor = OpenBabelExecutor() + if not self._openbabel_executor.is_available(): + raise StepFailed( + "AutoDock Vina requires OpenBabel execution, initialization failed." + ) + + # set ADV specific settings and ensure that each molecule gets its own sublist + self.adv_additional = ADVAdditional(**self.settings.additional) + self.execution.parallelization.max_length_sublists = 1 + + def _set_docking_score(self, conformer: Chem.Mol) -> bool: + try: + result_tag_lines = conformer.GetProp(_ADE.REMARK_TAG).split("\n") + result_line = [ + line for line in result_tag_lines if _ADE.RESULT_LINE_IDENTIFIER in line + ][0] + parts = result_line.split() + docking_score = parts[_ADE.RESULT_LINE_POS_SCORE] + except KeyError: + return False + conformer.SetProp(_SBE.ANNOTATION_TAG_DOCKING_SCORE, str(docking_score)) + return True + + def _write_molecule_to_pdbqt(self, path: str, molecule: Chem.Mol) -> bool: + # generate temporary copy as PDB + _, tmp_pdb = gen_tmp_file(suffix=".pdb", dir=os.path.dirname(path)) + Chem.MolToPDBFile(molecule, filename=tmp_pdb) + + # translate the pdb into a pdbqt including partial charges + # Note: In contrast to the target preparation, + # we will use a tree-based flexibility treatment here - + # thus, the option "-xr" is NOT used. + arguments = [ + tmp_pdb, + _OBE.OBABEL_OUTPUT_FORMAT_PDBQT, + "".join([_OBE.OBABEL_O, path]), + _OBE.OBABEL_PARTIALCHARGE, + _OBE.OBABEL_PARTIALCHARGE_GASTEIGER, + ] + self._openbabel_executor.execute( + command=_OBE.OBABEL, arguments=arguments, check=False + ) + + if os.path.exists(path): + return True + else: + return False + + def _generate_temporary_input_output_files( + self, batch: List[List[Subtask]] + ) -> Tuple[List[str], List[str], List[str], List[str]]: + tmp_output_dirs = [] + tmp_input_paths = [] + tmp_output_paths = [] + enumeration_ids = [] + + for next_subtask_list in batch: + # for "AutoDock Vina", only single molecules can be handled so every sublist is + # guaranteed at this stage to have only one element + if len(next_subtask_list) > 1: + self._logger.log( + f"Subtask list length for ADV is > 1 ({len(next_subtask_list)}), only the first element will be processed.", + _LE.WARNING, + ) + subtask = next_subtask_list[0] + + # generate temporary input files and output directory + cur_tmp_output_dir = tempfile.mkdtemp() + _, cur_tmp_input_pdbqt = gen_tmp_file( + suffix=".pdbqt", dir=cur_tmp_output_dir + ) + _, cur_tmp_output_sdf = gen_tmp_file(suffix=".sdf", dir=cur_tmp_output_dir) + + # try to write the enumeration molecules out as PDBQT files + enumeration = subtask.data + mol = deepcopy(enumeration.get_molecule()) + if mol is None: + shutil.rmtree(cur_tmp_output_dir) + self._logger.log( + f"Enumeration {enumeration.get_index_string()} did not hold a valid RDkit molecule - skipped.", + _LE.DEBUG, + ) + continue + if not self._write_molecule_to_pdbqt(cur_tmp_input_pdbqt, mol): + self._logger.log( + f"Could not generate PDBQT intermediate file from enumeration {enumeration.get_index_string()} - skipped.", + _LE.DEBUG, + ) + continue + + # also store all the paths in case it succeeded -> these will be used later, failures will be ignored + tmp_output_dirs.append(cur_tmp_output_dir) + tmp_input_paths.append(cur_tmp_input_pdbqt) + tmp_output_paths.append(cur_tmp_output_sdf) + enumeration_ids.append(enumeration.get_index_string()) + + return tmp_output_dirs, tmp_input_paths, tmp_output_paths, enumeration_ids + + def _execute_autodockvina(self): + # get number of sublists in batch and initialize Parallelizer + adv_parallelizer = Parallelizer(func=self._run_subjob) + + # continue until everything is successfully done or number of retries have been exceeded + while self._subtask_container.done() is False: + next_batch = self._get_sublists(get_first_n_lists=self._get_number_cores()) + + # generate paths and initialize molecules (so that if they fail, this can be covered) + ( + tmp_output_dirs, + tmp_input_paths, + tmp_output_paths, + enumeration_ids, + ) = self._generate_temporary_input_output_files(next_batch) + + # execute the current batch in parallel; hand over lists of parameters (will be handled by Parallelizer) + # also increment the tries and set the status to "failed" (don't do that inside subprocess, as data is + # copied, not shared!) + _ = [sub.increment_tries() for element in next_batch for sub in element] + _ = [sub.set_status_failed() for element in next_batch for sub in element] + adv_parallelizer.execute_parallel( + input_path_pdbqt=tmp_input_paths, output_path_sdf=tmp_output_paths + ) + + # parse the output of that particular batch and remove temporary files + self._parse_adv_output_batch( + tmp_input_paths=tmp_input_paths, + tmp_output_paths=tmp_output_paths, + enumeration_ids=enumeration_ids, + next_batch=next_batch, + ) + + # clean-up + self._remove_temporary(tmp_output_dirs) + + # print the progress for this execution + self._log_execution_progress() + + def _parse_adv_output_batch( + self, + tmp_input_paths: List[str], + tmp_output_paths: List[str], + enumeration_ids: List[str], + next_batch: List[List[Subtask]], + ): + + for i in range(len(next_batch)): + subtask = next_batch[i][0] + tmp_output_path = tmp_output_paths[i] + tmp_input_path = tmp_input_paths[i] + enumeration_id = enumeration_ids[i] + grid_id = self.adv_additional.grid_ids[0] + grid_path = self.adv_additional.configuration.receptor_path + + # this is a protection against the case where empty (file size == 0 bytes) files are generated due to + # a failure during docking + if ( + not os.path.isfile(tmp_output_path) + or os.path.getsize(tmp_output_path) == 0 + ): + continue + + mol_supplier = Chem.SDMolSupplier(tmp_output_path, removeHs=False) + for mol in mol_supplier: + if mol is None: + continue + cur_enumeration_name = str(mol.GetProp("_Name")) + + # add the information on the actual grid used + mol.SetProp(_SBE.ANNOTATION_GRID_ID, str(grid_id)) + mol.SetProp(_SBE.ANNOTATION_GRID_PATH, str(grid_path)) + mol.SetProp(_SBE.ANNOTATION_GRID_FILENAME, os.path.basename(grid_path)) + + # if no docking score is attached (i.e. the molecule is a receptor or so, skip it) + if self._set_docking_score(mol) is not True: + continue + + # add molecule to the appropriate ligand + for compound in self.get_compounds(): + for enumeration in compound: + if enumeration.get_index_string() == enumeration_id: + new_conformer = Conformer( + conformer=mol, + conformer_id=None, + enumeration_object=enumeration, + ) + enumeration.add_conformer(new_conformer, auto_update=True) + subtask.set_status_success() + break + + def _delay_file_system(self, path) -> bool: + return self._wait_until_file_generation( + path=path, interval_sec=2, maximum_sec=10 + ) + + def _run_subjob(self, input_path_pdbqt: str, output_path_sdf: str): + + config = self.adv_additional.configuration + + # set up arguments list and execute + _, tmp_pdbqt_docked = gen_tmp_file( + suffix=".pdbqt", dir=os.path.dirname(input_path_pdbqt) + ) + arguments = [ + _ADE.VINA_RECEPTOR, + config.receptor_path, + _ADE.VINA_LIGAND, + input_path_pdbqt, + _ADE.VINA_CPU, + str(1), + _ADE.VINA_SEED, + config.seed, + _ADE.VINA_OUT, + tmp_pdbqt_docked, + _ADE.VINA_CENTER_X, + str(config.search_space.center_x), + _ADE.VINA_CENTER_Y, + str(config.search_space.center_y), + _ADE.VINA_CENTER_Z, + str(config.search_space.center_z), + _ADE.VINA_SIZE_X, + str(config.search_space.size_x), + _ADE.VINA_SIZE_Y, + str(config.search_space.size_y), + _ADE.VINA_SIZE_Z, + str(config.search_space.size_z), + _ADE.VINA_NUM_MODES, + config.number_poses, + ] + + execution_result = self._backend_executor.execute( + command=_ADE.VINA, arguments=arguments, check=True + ) + self._delay_file_system(path=tmp_pdbqt_docked) + + # translate the parsed output PDBQT into an SDF + arguments = [ + tmp_pdbqt_docked, + _OBE.OBABEL_INPUTFORMAT_PDBQT, + _OBE.OBABEL_OUTPUT_FORMAT_SDF, + "".join([_OBE.OBABEL_O, output_path_sdf]), + ] + self._openbabel_executor.execute( + command=_OBE.OBABEL, arguments=arguments, check=False + ) + self._delay_file_system(path=output_path_sdf) + + def execute(self): + # Note: This step only supports one grid at a time, ensemble docking is taken care of at the workflow level! + + # in order to be able to efficiently execute ADV on the enumeration level, all of them have to be unrolled + # Note: As they retain their respective Compound object, the attribution later on is simple + all_enumerations = [] + for compound in self.get_compounds(): + all_enumerations = all_enumerations + compound.get_enumerations() + for enumeration in compound: + enumeration.clear_conformers() + + # split into sublists, according to the settings + self._subtask_container = SubtaskContainer( + max_tries=self.execution.failure_policy.n_tries + ) + self._subtask_container.load_data(all_enumerations) + + # execute ADV + self._execute_autodockvina() diff --git a/icolos/core/workflow_steps/autodockvina/target_preparation.py b/icolos/core/workflow_steps/autodockvina/target_preparation.py new file mode 100644 index 0000000..84f3ab9 --- /dev/null +++ b/icolos/core/workflow_steps/autodockvina/target_preparation.py @@ -0,0 +1,137 @@ +from pydantic import BaseModel +from rdkit import Chem + +from icolos.utils.enums.program_parameters import OpenBabelEnum +from icolos.utils.enums.step_enums import StepAutoDockVinaTargetPreparationEnum +from icolos.utils.execute_external.autodockvina import AutoDockVinaExecutor +from icolos.utils.execute_external.openbabel import OpenBabelExecutor +from icolos.utils.general.icolos_exceptions import StepFailed + +from icolos.core.workflow_steps.step import _LE, StepBase + +_STE = StepAutoDockVinaTargetPreparationEnum() +_OBE = OpenBabelEnum() + + +class ADVExtractBoxTP(BaseModel): + reference_ligand_path: str = None + reference_ligand_format: str = _STE.EXTRACT_BOX_REFERENCE_LIGAND_FORMAT_PDB + + +class ADVAdditionalTP(BaseModel): + pH: float = ( + 7.4 # set target pH value that determines the protein's side-chain states + ) + input_receptor_pdb: str = None + output_receptor_pdbqt: str = None + extract_box: ADVExtractBoxTP = ADVExtractBoxTP() + + +class StepAutoDockVinaTargetPreparation(StepBase, BaseModel): + _openbabel_executor: OpenBabelExecutor = None + adv_additional: ADVAdditionalTP = None + + def __init__(self, **data): + super().__init__(**data) + + # initialize the executor and test availability + self._initialize_backend(executor=AutoDockVinaExecutor) + self._check_backend_availability() + + # initialize the executor for all "OpenBabel" + self._openbabel_executor = OpenBabelExecutor() + if not self._openbabel_executor.is_available(): + raise StepFailed( + "AutoDock Vina requires OpenBabel execution, initialization failed." + ) + + # set ADV specific settings and ensure that each molecule gets its own sublist + self.adv_additional = ADVAdditionalTP(**self.settings.additional) + + def _export_as_pdb2pdbqt(self): + # Note: In contrast to the ligand preparation, we will not use a tree-based flexibility treatment here - thus, + # the option "-xr" is used. Partial charges of the receptor are not used in AutoDock Vina. + arguments = [ + " ".join( + [_OBE.OBABEL_INPUTFORMAT_PDB, self.adv_additional.input_receptor_pdb] + ), + _OBE.OBABEL_OUTPUT_FORMAT_PDBQT, + " ".join([_OBE.OBABEL_O, self.adv_additional.output_receptor_pdbqt]), + "".join([_OBE.OBABEL_X, _OBE.OBABEL_X_R]), + _OBE.OBABEL_P, + str(self.adv_additional.pH), + _OBE.OBABEL_PARTIALCHARGE, + _OBE.OBABEL_PARTIALCHARGE_GASTEIGER, + ] + self._openbabel_executor.execute( + command=_OBE.OBABEL, arguments=arguments, check=True + ) + self._logger.log( + f"Exported target as PDBQT file {self.adv_additional.output_receptor_pdbqt}.", + _LE.INFO, + ) + + def _log_extract_box(self): + x_coords, y_coords, z_coords = self._extract_box() + if x_coords is not None: + + def dig(value): + return round(value, ndigits=2) + + self._logger.log( + f"Calculating lingad dimensions for AutoDock Vina docking protocol.", + _LE.INFO, + ) + self._logger.log( + f"Ligand ({self.adv_additional.extract_box.reference_ligand_path}):", + _LE.INFO, + ) + self._logger_blank.log( + f"X coordinates: min={dig(min(x_coords))}, max={dig(max(x_coords))}, mean={dig(sum(x_coords) / len(x_coords))}", + _LE.INFO, + ) + self._logger_blank.log( + f"Y coordinates: min={dig(min(y_coords))}, max={dig(max(y_coords))}, mean={dig(sum(y_coords) / len(y_coords))}", + _LE.INFO, + ) + self._logger_blank.log( + f"Z coordinates: min={dig(min(z_coords))}, max={dig(max(z_coords))}, mean={dig(sum(z_coords) / len(z_coords))}", + _LE.INFO, + ) + + def _extract_box(self): + # extracts box suggestions from a reference ligand, which can be added to a AutoDock Vina run + # load the reference file (PDB or SDF) + ref_format = self.adv_additional.extract_box.reference_ligand_format.upper() + if ref_format == _STE.EXTRACT_BOX_REFERENCE_LIGAND_FORMAT_PDB: + ref_mol = Chem.MolFromPDBFile( + self.adv_additional.extract_box.reference_ligand_path, sanitize=True + ) + elif ref_format == _STE.EXTRACT_BOX_REFERENCE_LIGAND_FORMAT_SDF: + mol_supplier = Chem.SDMolSupplier( + self.adv_additional.extract_box.reference_ligand_path + ) + for mol in mol_supplier: + if mol is None: + raise StepFailed( + f"Could not load molecule from {self.adv_additional.extract_box.reference_ligand_path} - abort." + ) + ref_mol = mol + break + else: + raise StepFailed( + f"Reference ligand format {ref_format} not supported, use PDB or SDF instead - abort." + ) + + # extract coordinates + x_coords = [atom[0] for atom in ref_mol.GetConformer(0).GetPositions()] + y_coords = [atom[1] for atom in ref_mol.GetConformer(0).GetPositions()] + z_coords = [atom[2] for atom in ref_mol.GetConformer(0).GetPositions()] + return x_coords, y_coords, z_coords + + def execute(self): + # translate input PDB file into output PDBQT file + self._export_as_pdb2pdbqt() + + # extract and log the "box" dimensions based on the reference ligand + self._log_extract_box() diff --git a/icolos/core/workflow_steps/calculation/__init__.py b/icolos/core/workflow_steps/calculation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/core/workflow_steps/calculation/base.py b/icolos/core/workflow_steps/calculation/base.py new file mode 100644 index 0000000..0e99f61 --- /dev/null +++ b/icolos/core/workflow_steps/calculation/base.py @@ -0,0 +1,52 @@ +import numpy as np +import pandas as pd + +from pydantic import BaseModel +from rdkit.Chem import AllChem +from typing import List + +from icolos.core.containers.compound import Conformer + +from icolos.core.workflow_steps.step import StepBase +from icolos.utils.enums.step_enums import StepRMSFilterEnum + +_SRF = StepRMSFilterEnum() + + +class StepCalculationBase(StepBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + def _get_rms_method(self): + # there are two modes for the execution: "best" is better, but sometimes has performance issues + # for larger molecules + if self.settings.additional[_SRF.METHOD] == _SRF.METHOD_ALIGNMOL: + return AllChem.AlignMol + elif self.settings.additional[_SRF.METHOD] == _SRF.METHOD_BEST: + return AllChem.GetBestRMS + else: + raise ValueError( + f"RMS mode {self.settings.arguments.parameters[_SRF.METHOD]} not supported (either {_SRF.METHOD_ALIGNMOL} or {_SRF.METHOD_BEST})." + ) + + @staticmethod + def _get_property_values(conformers: List[Conformer], prop: str) -> List[float]: + return [float(conf.get_molecule().GetProp(prop)) for conf in conformers] + + @staticmethod + def _calculate_rms_matrix( + conformers: List[Conformer], rms_method, decimals=3 + ) -> pd.DataFrame: + n_conf = len(conformers) + df_rms = pd.DataFrame(np.nan, index=range(n_conf), columns=range(n_conf)) + np.fill_diagonal(df_rms.values, 0) + + for i in range(n_conf - 1): + for j in range(i + 1, n_conf): + df_rms.iloc[i, j] = df_rms.iloc[j, i] = np.round( + rms_method( + conformers[i].get_molecule(), conformers[j].get_molecule() + ), + decimals=decimals, + ) + return df_rms diff --git a/icolos/core/workflow_steps/calculation/boltzmann_weighting.py b/icolos/core/workflow_steps/calculation/boltzmann_weighting.py new file mode 100644 index 0000000..581c0cd --- /dev/null +++ b/icolos/core/workflow_steps/calculation/boltzmann_weighting.py @@ -0,0 +1,98 @@ +from copy import deepcopy + +import numpy as np +from typing import List + +from pydantic import BaseModel + +from icolos.core.containers.compound import Enumeration, Conformer + +from icolos.utils.enums.step_enums import StepBoltzmannWeightingEnum +from icolos.core.workflow_steps.calculation.base import StepCalculationBase + +from icolos.utils.general.convenience_functions import * +from icolos.utils.constants import * + +_SBWE = StepBoltzmannWeightingEnum() + + +class StepBoltzmannWeighting(StepCalculationBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + def _get_relative_energy_values( + self, conformers: List[Conformer], property_name: str + ) -> List[float]: + values = [float(c.get_molecule().GetProp(property_name)) for c in conformers] + min_val = min(values) + relative_values = [value - min_val for value in values] + return relative_values + + def _obtain_factors(self, relative_values: List[float]) -> List[float]: + # calculate individual Boltzmann factors + individual_factors = [ + np.exp((-1 * val / (CONSTANT_KB * CONSTANT_T))) for val in relative_values + ] + + # calculate and return Boltzmann factors + sum_factors = sum(individual_factors) + factors = [val / sum_factors for val in individual_factors] + return factors + + def _calculate_Boltzmann_factors( + self, enumeration: Enumeration, parameters: dict + ) -> List[str]: + list_properties = parameters[_SBWE.PROPERTIES] + list_output_names = [] + for prop in list_properties: + # (1) get the relative values for this property (e.g. solvent) for all conformers in respect to the one + # with the minimal energy + relative_prop_values = self._get_relative_energy_values( + conformers=enumeration.get_conformers(), + property_name=prop[_SBWE.PROPERTIES_INPUT], + ) + + # (2) calculate the Boltzmann factors for this property + boltzmann_factors = self._obtain_factors( + relative_values=relative_prop_values + ) + + # (3) add the Boltzmann factors to the conformers as a tag + for c, bm_factor in zip(enumeration.get_conformers(), boltzmann_factors): + c.get_molecule().SetProp(prop[_SBWE.PROPERTIES_OUTPUT], str(bm_factor)) + list_output_names.append(prop[_SBWE.PROPERTIES_OUTPUT]) + return list_output_names + + def _do_Boltzmann_weighting(self, conformers: List[Conformer], weightings: dict): + input_tags = weightings[_SBWE.WEIGHT_INPUT] + output_prefix = nested_get( + weightings, _SBWE.WEIGHT_OUTPUT_PREFIX, default="bf_weighted" + ) + properties = weightings[_SBWE.WEIGHT_PROPERTIES] + for prop in properties: + for inp_tag in input_tags: + new_tag_name = "_".join([output_prefix, inp_tag, prop]) + products = [] + for conformer in conformers: + conf = conformer.get_molecule() + products.append( + float(conf.GetProp(prop)) * float(conf.GetProp(inp_tag)) + ) + for conformer in conformers: + conformer.get_molecule().SetProp(new_tag_name, str(sum(products))) + + def execute(self): + parameters = deepcopy(self.settings.arguments.parameters) + for compound in self.get_compounds(): + for enumeration in compound.get_enumerations(): + if self._input_object_empty(enumeration): + continue + + # get the name of the Boltzmann properties / solvents and annotate the factors + _ = self._calculate_Boltzmann_factors(enumeration, parameters) + + # for each property and each weighting, add the respective tags + self._do_Boltzmann_weighting( + conformers=enumeration.get_conformers(), + weightings=parameters[_SBWE.WEIGHT], + ) diff --git a/icolos/core/workflow_steps/calculation/clustering.py b/icolos/core/workflow_steps/calculation/clustering.py new file mode 100644 index 0000000..ae0f99e --- /dev/null +++ b/icolos/core/workflow_steps/calculation/clustering.py @@ -0,0 +1,140 @@ +import pandas as pd +from typing import List, Tuple + +from pydantic import BaseModel + +from icolos.core.containers.compound import Conformer + +from icolos.utils.enums.step_enums import StepClusteringEnum +from icolos.core.workflow_steps.step import _LE +from icolos.core.workflow_steps.calculation.base import StepCalculationBase + +from sklearn.cluster import KMeans + +_SC = StepClusteringEnum() + + +class StepClustering(StepCalculationBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + # extend parameters + if _SC.N_CLUSTERS not in self.settings.arguments.parameters.keys(): + self.settings.arguments.parameters[_SC.N_CLUSTERS] = 3 + if _SC.MAX_ITER not in self.settings.arguments.parameters.keys(): + self.settings.arguments.parameters[_SC.MAX_ITER] = 300 + if _SC.TOP_N_PER_SOLVENT not in self.settings.additional.keys(): + self.settings.additional[_SC.TOP_N_PER_SOLVENT] = 3 + + def _get_nclusters_and_top_n(self, len_conformers: int) -> Tuple[int, int]: + n_clusters = self.settings.arguments.parameters[_SC.N_CLUSTERS] + if n_clusters > len_conformers: + n_clusters = len_conformers + self._logger.log( + f"Set number of clusters to {n_clusters} because not enough observations were provided.", + _LE.DEBUG, + ) + top_n_per_solvent = self.settings.additional[_SC.TOP_N_PER_SOLVENT] + if top_n_per_solvent > len_conformers: + top_n_per_solvent = len_conformers + self._logger.log( + f'Set number of "top_N_per_solvent" to {top_n_per_solvent} because not enough observations were provided.', + _LE.DEBUG, + ) + return n_clusters, top_n_per_solvent + + def _generate_feature_dataframe(self, conformers: List[Conformer]) -> pd.DataFrame: + features = self.settings.additional[_SC.FEATURES] + df_features = pd.DataFrame(columns=features) + for conf in conformers: + new_row = {} + for feature in features: + new_row[feature] = float(conf.get_molecule().GetProp(feature)) + df_features = df_features.append(new_row, ignore_index=True) + return df_features + + def _get_representative_conformers( + self, cluster_set: List[Tuple[int, Conformer]] + ) -> List[int]: + # for each selection (e.g. solvent), obtain the N top conformers (note, that the input is already clustered) + # also get rid of duplicates in the indices + rep_indices = [] + for solvent_key in self.settings.additional[_SC.FREE_ENERGY_SOLVENT_TAGS]: + conf_indices = [tuple_conf[0] for tuple_conf in cluster_set] + solvent_dGs = [ + float(tuple_conf[1].get_molecule().GetProp(solvent_key)) + for tuple_conf in cluster_set + ] + + # sort list of global indices for this cluster according to their free energy for this solvent + # note: from lowest (most negative) -> highest + conf_indices_sorted = [ + idx for _, idx in sorted(zip(solvent_dGs, conf_indices)) + ] + rep_indices = ( + rep_indices + + conf_indices_sorted[ + 0 : min( + len(conf_indices), + self.settings.additional[_SC.TOP_N_PER_SOLVENT], + ) + ] + ) + return list(set(rep_indices)) + + def _cluster_conformers(self, conformers: List[Conformer]) -> List[Conformer]: + # make sure the number of clusters specified and "N top per solvent" are not higher than the compound number + n_clusters, top_n_per_solvent = self._get_nclusters_and_top_n( + len_conformers=len(conformers) + ) + + # initialize K-means instance + kmeans = KMeans( + n_clusters=n_clusters, + max_iter=self.settings.arguments.parameters[_SC.MAX_ITER], + init="k-means++", + n_init=10, + tol=1e-04, + random_state=0, + ) + + # generate dataframe with selected properties + df_features = self._generate_feature_dataframe(conformers=conformers) + + # predict cluster and assign to conformer + cluster_labels = kmeans.fit_predict(df_features) + keep_indices = [] + for cluster_label in range(n_clusters): + # keep the "global" index to select the appropriate conformers later + cluster_set = [ + (i, conformers[i]) + for i in range(len(conformers)) + if cluster_labels[i] == cluster_label + ] + keep_indices = keep_indices + self._get_representative_conformers( + cluster_set=cluster_set + ) + return [conformers[i] for i in range(len(conformers)) if i in keep_indices] + + def execute(self): + for compound in self.get_compounds(): + for enumeration in compound.get_enumerations(): + if len(enumeration.get_conformers()) == 0: + continue + + number_conformers_before = len(enumeration) + + # cluster conformers on the enumeration level + clustered_conformers = self._cluster_conformers( + conformers=enumeration.get_conformers() + ) + + # add clustered conformers to enumeration + enumeration.clear_conformers() + for conf in clustered_conformers: + enumeration.add_conformer(conformer=conf, auto_update=True) + number_conformers_after = len(enumeration) + self._logger.log( + f"Clustered {number_conformers_before} into {number_conformers_after} conformers for enumeration {enumeration.get_index_string()}.", + _LE.INFO, + ) diff --git a/icolos/core/workflow_steps/calculation/cosmo.py b/icolos/core/workflow_steps/calculation/cosmo.py new file mode 100644 index 0000000..2fa112a --- /dev/null +++ b/icolos/core/workflow_steps/calculation/cosmo.py @@ -0,0 +1,311 @@ +import os +import tempfile +from typing import Tuple, List +from copy import deepcopy + +from pydantic import BaseModel + +from icolos.utils.execute_external.turbomole import TurbomoleExecutor + +from icolos.core.containers.compound import Conformer, Enumeration + +from icolos.utils.enums.program_parameters import TurbomoleEnum +from icolos.utils.enums.program_parameters import CosmoOutputEnum +from icolos.utils.enums.compound_enums import ConformerContainerEnum +from icolos.core.workflow_steps.calculation.base import StepCalculationBase +from icolos.core.workflow_steps.step import _LE +from icolos.loggers.logger_utils import log_multiline_string +from icolos.utils.general.files_paths import attach_root_path + +_EE = TurbomoleEnum() +_CTE = ConformerContainerEnum() +_COE = CosmoOutputEnum() + + +class StepCosmo(StepCalculationBase, BaseModel): + """Step that executes Cosmo. + + Note, that the execution (especially in conjunction with a preceding turbomole step) is relatively complex. + (1) Take the coord file from the additional data attached to the conformers, + (2) run Cosmo, + (3) extract the final XYZ snapshot with x2t, + (4) translate it to a SDF file with obabel and + (5) combined the new coordinates with the tags.""" + + def __init__(self, **data): + super().__init__(**data) + + # initialize the executor and test availability + self._initialize_backend(executor=TurbomoleExecutor) + self._check_backend_availability() + + # initialize the executor and test availability + # as they are linked, use a "TurbomoleExecutor" here + self._initialize_backend(executor=TurbomoleExecutor) + self._check_backend_availability() + + def _prepare_tmp_input_directories( + self, enumeration: Enumeration + ) -> Tuple[List[str], List[str], List[str], List[str]]: + tmp_dirs = [] + paths_input_cosmofile = [] + paths_config_cosmotherm = [] + paths_output_cosmotherm = [] + for conformer in enumeration: + # 1) generate all temporary paths + tmp_dir = tempfile.mkdtemp() + path_input_cosmofile = os.path.join(tmp_dir, _EE.TM_OUTPUT_COSMOFILE) + path_config_cosmofile = os.path.join(tmp_dir, _EE.CT_COSMOTHERM_CONFIG_FILE) + path_output_cosmotherm = os.path.join( + tmp_dir, _EE.CT_COSMOTHERM_OUTPUT_FILE + ) + + # 2) write-out the COSMO file + # Note, that the generation of the COSMO files is part of the Turbomole execution. The reason is, that + # the generation is complicated and uses a lot of input form the TM step, thus "cosmoprep" is + # executed there. + if _CTE.EXTRA_DATA_COSMOFILE not in conformer.get_extra_data().keys(): + self._logger.log( + f"In order to write out COSMO files, the content needs to be annotated as extra data in the conformers. Have you executed Turbomole before?", + _LE.ERROR, + ) + raise ValueError("Could not find COSMO data to write out - abort.") + with open(path_input_cosmofile, "w") as f: + f.writelines(conformer.get_extra_data()[_CTE.EXTRA_DATA_COSMOFILE]) + + # 3) add paths + tmp_dirs.append(tmp_dir) + paths_input_cosmofile.append(path_input_cosmofile) + paths_config_cosmotherm.append(path_config_cosmofile) + paths_output_cosmotherm.append(path_output_cosmotherm) + + return ( + tmp_dirs, + paths_input_cosmofile, + paths_config_cosmotherm, + paths_output_cosmotherm, + ) + + def _execute_run(self, config_path: str): + result = self._backend_executor.execute( + command=_EE.CT_COSMOTHERM, arguments=[config_path], check=True + ) + if _EE.CT_COSMOTHERM_FAIL_STRING in result.stderr: + self._logger.log( + f"Execution of {_EE.CT_COSMOTHERM} failed. Error message:", _LE.ERROR + ) + log_multiline_string( + logger=self._logger_blank, + level=_LE.ERROR, + multi_line_string=result.stdout, + ) + + def _write_config_file(self, config_path: str): + # by default use the internal configuration, but if one has been specified, use this one + # note, that the default name of the COSMO file is "mol.cosmo", so this should be used in any config file + if _EE.CT_CONFIG not in self.settings.arguments.parameters.keys(): + with open(attach_root_path(_EE.CT_CONFIG_DEFAULTPATH), "r") as f: + config = f.readlines() + self._logger.log( + f"Loaded {_EE.CT_COSMOTHERM} configuration from default file {_EE.CT_CONFIG_DEFAULTPATH}.", + _LE.DEBUG, + ) + else: + config = self.settings.arguments.parameters[_EE.CT_CONFIG] + with open(config_path, "w") as f: + f.writelines([line.rstrip("\n") + "\n" for line in config]) + + def _get_line_by_pattern(self, lines: List[str], pattern: str) -> str: + for line in lines: + if pattern in line: + return line + + def _get_values_from_line(self, line: str) -> List[str]: + try: + value_part = line.split(":")[1] + return value_part.split() + except Exception: + return [] + + def _annotate_from_output_block( + self, conformer: Conformer, block: List[str], annotation: dict + ): + for key in annotation.keys(): + # get the line with the values + line = self._get_line_by_pattern( + lines=block, pattern=annotation[key][_COE.PATTERN] + ) + if line is None: + continue + + # get the values and select the one that is to be added + try: + values = self._get_values_from_line(line=line) + value = values[annotation[key][_COE.ELEMENT]] + except IndexError: + continue + + # add it as a tag to the conformer; we can replace part of the tag name with e.g. the solvent + # names if we need to + conformer.get_molecule().SetProp(key, value) + + def _get_solvents_from_header(self, header: List[str]): + line_solvents = self._get_line_by_pattern( + header, pattern=_COE.SOLVENT_BLOCK_HEADER_COMPOUNDS_PATTERN + ) + return self._get_values_from_line(line_solvents) + + def _get_current_solvent_from_header(self, header: List[str]): + line_mol_fraction = self._get_line_by_pattern( + header, pattern=_COE.SOLVENT_BLOCK_HEADER_MOLFRACTION_PATTERN + ) + solvent_index = self._get_values_from_line(line_mol_fraction).index( + _COE.SOLVENT_BLOCK_CURRENT_FRACTION_VALUE + ) + return self._get_solvents_from_header(header)[solvent_index] + + def _parse_general_block(self, lines: List[str], conformer: Conformer): + general_block = [] + for index in range(len(lines)): + if _COE.GENERAL_BLOCK_PATTERN_STRING in lines[index]: + # skip the first lines after the header + index += 2 + while not lines[index] == "": + general_block.append(lines[index]) + index += 1 + break + self._annotate_from_output_block( + conformer=conformer, + block=general_block, + annotation=_COE.GENERAL_BLOCK_ANNOTATIONS, + ) + + def _load_solvent_blocks(self, lines: List[str]) -> List[dict]: + solvent_blocks = [] + index = 0 + while index < len(lines): + if _COE.SOLVENT_BLOCK_PATTERN_STRING in lines[index]: + # we need to extract both the header (which solvent?) and the body (actual values) + new_block = {"header": [], "body": []} + # go back to start of block + while ( + index >= 0 and _COE.SOLVENT_BLOCK_START_PATTERN not in lines[index] + ): + index -= 1 + + # extract the header + while _COE.SOLVENT_BLOCK_BODY_START_PATTERN not in lines[index]: + new_block["header"].append(lines[index]) + index += 1 + + # extract the body + while index < len(lines) and not ( + lines[index] == "" and lines[index + 1] == "" + ): + new_block["body"].append(lines[index]) + index += 1 + solvent_blocks.append(new_block) + index += 1 + return solvent_blocks + + def _annotate_solvent_blocks( + self, solvent_blocks: List[dict], conformer: Conformer + ): + for block_dict in solvent_blocks: + # get solvent and translate according to internal solvent abbreviation table + try: + current_solvent = self._get_current_solvent_from_header( + block_dict["header"] + ) + if current_solvent in _COE.SOLVENT_TRANSLATE_SOLVENT.keys(): + current_solvent = _COE.SOLVENT_TRANSLATE_SOLVENT[current_solvent] + except ValueError: + continue + + # overwrite the solvent name placeholder in and annotate + template_annotations = deepcopy(_COE.SOLVENT_BLOCK_BODY_ANNOTATIONS) + annotations = {} + for key in template_annotations.keys(): + new_key = key.replace(_COE.SOLVENT_REPLACEHOLDER, current_solvent) + annotations[new_key] = template_annotations[key] + + # annotate + self._annotate_from_output_block( + conformer=conformer, block=block_dict["body"], annotation=annotations + ) + + def _parse_output(self, path_output: str, conformer: Conformer): + # there are two sets of blocks we need to parse: the "general" block, that is always present and, if specified, + # free energies from solvents ("mixtures") + # 1) load the file + with open(path_output, "r") as f: + lines = f.readlines() + lines = [line.rstrip("\n") for line in lines] + + # 2) extract the general block: from the match of the pattern line until the second empty line occurs + # e.g. "--- Compound 1 (mol) ---\n\nAtomic weights : 111111\n ...\n\n" + self._parse_general_block(lines, conformer) + + # 3) extract the solvent blocks (if available) + # search for the first occurrence of a Gibb's free energy and expand top until the pattern line is found and + # to bottom until more than one empty line is hit; proceed until all blocks are processed + solvent_blocks = self._load_solvent_blocks(lines) + + if len(solvent_blocks) > 0: + self._annotate_solvent_blocks(solvent_blocks, conformer) + + def execute(self): + for compound in self.get_compounds(): + for enumeration in compound.get_enumerations(): + if len(enumeration.get_conformers()) == 0: + continue + + # generate copies of the conformers, as to not accidentally manipulate them + inp_enum = deepcopy(enumeration) + + # prepare the temporary files and retrieve paths (TM config is charge-state dependent!) + ( + tmp_dirs, + paths_input_cosmofile, + paths_config_cosmotherm, + paths_output_cosmotherm, + ) = self._prepare_tmp_input_directories(enumeration=inp_enum) + + # execute individual conformers + for ( + tmp_dir, + path_config_cosmotherm, + conformer, + path_output_cosmotherm, + ) in zip( + tmp_dirs, + paths_config_cosmotherm, + enumeration.get_conformers(), + paths_output_cosmotherm, + ): + self._move_to_dir(tmp_dir) + + # set a necessary environment variable to avoid clashes + os.environ[_EE.TM_TURBOTMPDIR] = tmp_dir + + # write configuration file + self._write_config_file(config_path=path_config_cosmotherm) + + # all ready; start the execution + self._execute_run(config_path=path_config_cosmotherm) + + # parse the results + self._parse_output( + path_output=path_output_cosmotherm, conformer=conformer + ) + + # restore working directory and remove temporary files + self._restore_working_dir() + for tmp_dir in tmp_dirs: + if os.path.isdir(tmp_dir): + self._remove_temporary(tmp_dir) + + self._logger.log( + f"Executed COSMO for {len(enumeration.get_conformers())} conformers for enumeration {enumeration.get_index_string()}.", + _LE.INFO, + ) diff --git a/icolos/core/workflow_steps/calculation/electrostatics/__init__.py b/icolos/core/workflow_steps/calculation/electrostatics/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/core/workflow_steps/calculation/electrostatics/cresset_ec.py b/icolos/core/workflow_steps/calculation/electrostatics/cresset_ec.py new file mode 100644 index 0000000..2955351 --- /dev/null +++ b/icolos/core/workflow_steps/calculation/electrostatics/cresset_ec.py @@ -0,0 +1,108 @@ +from copy import deepcopy +from typing import List, Tuple +from icolos.core.workflow_steps.step import StepBase +from pydantic import BaseModel +import tempfile +from icolos.utils.enums.step_enums import StepCressetEnum +from icolos.utils.execute_external.cresset_executor import CressetExecutor +from icolos.utils.general.files_paths import gen_tmp_file +from icolos.core.workflow_steps.step import _LE +import os +from icolos.utils.general.parallelization import Parallelizer, SubtaskContainer + + +_SCE = StepCressetEnum() + + +class StepCressetEC(StepBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=CressetExecutor) + self._check_backend_availability() + + def _prepare_tmp_input(self, batch: List) -> Tuple[List, List]: + conformers = [] + tmp_dirs = [] + protein = self.data.generic.get_argument_by_extension( + "pdb", rtn_file_object=True + ) + for sublist in batch: + for task in sublist: + conformer = task.data + conformers.append(conformer) + + # generate the tmpdir + tmp_dir = tempfile.mkdtemp() + tmp_dirs.append(tmp_dir) + _, path_input_sdf = gen_tmp_file( + prefix="tmp_", suffix=".sdf", dir=tmp_dir + ) + conformer.write(path=path_input_sdf) + + # write the protein to that tmpdir + protein.write(path=os.path.join(tmp_dir, "protein.pdb"), join=False) + + return conformers, tmp_dirs + + def _execute_cresset_ec_parallel(self): + parallelizer = Parallelizer(func=self._run_conformer) + n = 1 + + while self._subtask_container.done() is False: + + next_batch = self._get_sublists( + get_first_n_lists=self._get_number_cores() + ) # return n lists of length max_sublist_length + _ = [sub.increment_tries() for element in next_batch for sub in element] + _ = [sub.set_status_failed() for element in next_batch for sub in element] + + conformers, tmp_dirs = self._prepare_tmp_input(next_batch) + self._logger.log( + f"Executing Cresset EC for batch {n} containing {len(conformers)} conformers", + _LE.DEBUG, + ) + + parallelizer.execute_parallel(tmp_dir=tmp_dirs, conformer=conformers) + + results = self._parse_results(tmp_dirs, conformers) + + for sublist, result in zip(next_batch, results): + # TODO: this only works if max length sublist == 1, fine for now as that is all turbomole can handle + for task in sublist: + if result == _SCE.SUCCESS: + task.set_status_success() + else: + task.set_status_failed() + self._remove_temporary(tmp_dirs) + n += 1 + + def _parse_results(self, tmp_dirs: List, conformers: List): + # walk over the directory structure, parse the output file, identify the conformer, attach a tag to the mol object + # TODO: No idea what the output looks like for this, write the parser!! + pass + + def execute(self): + # unroll all conformers + all_conformers = [] + for compound in self.get_compounds(): + for enum in compound.get_enumerations(): + if self._input_object_empty(enum): + continue + else: + for conformer in enum.get_conformers(): + conf = deepcopy(conformer) + all_conformers.append(conf) + + self._subtask_container = SubtaskContainer( + max_tries=self.execution.failure_policy.n_tries + ) + self._subtask_container.load_data(all_conformers) + self._execute_cresset_ec_parallel() + + def _run_conformer(self): + # run a single conformer through Flare's EC + self._backend_executor.execute() + + # execution is + # module load Flare && pyflare electrostaticcomplementarity.py -p protein.pdb ligands.sdf diff --git a/icolos/core/workflow_steps/calculation/electrostatics/esp_sim.py b/icolos/core/workflow_steps/calculation/electrostatics/esp_sim.py new file mode 100644 index 0000000..a3b0c95 --- /dev/null +++ b/icolos/core/workflow_steps/calculation/electrostatics/esp_sim.py @@ -0,0 +1,152 @@ +from copy import deepcopy +import tempfile +from typing import List +from icolos.core.containers.compound import Conformer, Enumeration +from icolos.core.workflow_steps.step import StepBase +from pydantic import BaseModel + +try: + from espsim import EmbedAlignConstrainedScore +except ImportError: + print( + "WARNING - Could not import module espsim, check it is installed in your environment" + ) + +from rdkit.Chem import AllChem, Mol +from rdkit import Chem +from rdkit.Chem import rdFMCS +from icolos.utils.general.parallelization import SubtaskContainer, Parallelizer +import os + +# Based on https://github.com/hesther/espsim + + +class StepEspSim(StepBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + def _compute_esp_sim(self, ref: Mol, trg: Enumeration, tmp_dir: str): + """ + :param ref : Reference molecule, the known binder against which to calculate similarity + :param trg: Icolos enumeration of the target molecule, as a smiles string. Embedded with RDKit + """ + # Create mol object from trg smile string + # housekeeping for data appending later + + trg_mol = Chem.AddHs(Chem.MolFromSmiles(trg.get_smile())) + + # get the mol object for the max common substructure + mcs = Chem.MolFromSmarts(rdFMCS.FindMCS([ref, trg_mol]).smartsString) + mcs = Chem.MolToSmiles(mcs) + + patt = Chem.MolFromSmiles(mcs, sanitize=False) + helper = Chem.AddHs(Chem.MolFromSmiles(mcs)) + + # Embed first reference molecule, create one conformer + AllChem.EmbedMolecule(helper, AllChem.ETKDG()) + + # Optimize the coordinates of the conformer + AllChem.UFFOptimizeMolecule(helper) + core = AllChem.DeleteSubstructs( + AllChem.ReplaceSidechains(helper, patt), Chem.MolFromSmiles("*") + ) # Create core molecule with 3D coordinates + core.UpdatePropertyCache() + + core = AllChem.DeleteSubstructs( + AllChem.ReplaceSidechains(helper, patt), Chem.MolFromSmiles("*") + ) # Create core molecule with 3D coordinates + core.UpdatePropertyCache() + + args = [ref, trg_mol, core] + + args = self._get_arguments(args) + + simShape, simEsp = EmbedAlignConstrainedScore(*args) + + # now attach the mols as conformersattach the scores to the mol objects + trg_conf = Conformer(conformer=trg_mol) + trg_conf.get_molecule().SetProp("shape_sim", str(simShape[0])) + trg_conf.get_molecule().SetProp("esp_sim", str(simEsp[0])) + + trg_conf.write(os.path.join(tmp_dir, "conformer.sdf")) + + def _get_arguments(self, std_args: List) -> List: + + for flag in self.settings.arguments.flags: + std_args.append(flag) + for key, value in self.settings.arguments.parameters: + std_args.append(key) + std_args.append(value) + return std_args + + def _prepare_batch(self, batch): + target_enums = [] + tmp_dirs = [] + + for sublist in batch: + for task in sublist: + target_enums.append(task.data) + tmp_dirs.append(tempfile.mkdtemp()) + return target_enums, tmp_dirs + + def _parse_output(self, trgs: List[Enumeration], tmp_dirs: List[str]) -> None: + for tmp_dir, trg in zip(tmp_dirs, trgs): + # grab the written sdf object + sdf_path = os.path.join(tmp_dir, "conformer.sdf") + mol_supplier = Chem.SDMolSupplier(sdf_path, removeHs=False) + for mol in mol_supplier: # should only be one conformer! + conf = Conformer(conformer=mol) + comp = self.get_compound_by_name(trg.get_compound_name()) + comp.find_enumeration(trg.get_enumeration_id()).add_conformer(conf) + + self._remove_temporary(tmp_dirs) + + def _execute_espsim_parallel(self): + # embed the reference compound + ref_compound = Chem.AddHs( + Chem.MolFromSmiles(self.settings.additional["ref_smiles"]) + ) + + parallelizer = Parallelizer(func=self._compute_esp_sim) + + while self._subtask_container.done() is False: + next_batch = self._get_sublists(get_first_n_lists=self._get_number_cores()) + + _ = [sub.increment_tries() for element in next_batch for sub in element] + _ = [sub.set_status_failed() for element in next_batch for sub in element] + + trgs, tmp_dirs = self._prepare_batch(next_batch) + + refs = [ref_compound for _ in range(len(next_batch))] + + parallelizer.execute_parallel(ref=refs, trg=trgs, tmp_dir=tmp_dirs) + # hand over the embedded reference (compute once) and target compound (smiles string to be embedded) + self._parse_output(tmp_dirs=tmp_dirs, trgs=trgs) + + for task in next_batch: + for subtask in task: + # TODO: Check return codes + subtask.set_status_success() + + def execute(self): + """ + esp-sim does molecular alignment with RDkit, then computes coulombic overlap integral + tanimoto similarity for shape measurement + + Use case takes a reference compound (known binder) and compare to REINVENT compounds + + Usage: + * Define reference compound using settings.additional, as a smile string, to be embedded by RDkit + * The remaining compounds are embedded using a preceeding RDkit embedding + * attach the resulting scores to the enumeration + """ + + all_enums = [] + for compound in self.get_compounds(): + for enumeration in compound: + all_enums.append(deepcopy(enumeration)) + + self.execution.parallelization.max_length_sublists = 1 + # unroll the provided compounds, + self._subtask_container = SubtaskContainer(max_tries=3) + self._subtask_container.load_data(all_enums) + self._execute_espsim_parallel() diff --git a/icolos/core/workflow_steps/calculation/feature_counter.py b/icolos/core/workflow_steps/calculation/feature_counter.py new file mode 100644 index 0000000..0423d00 --- /dev/null +++ b/icolos/core/workflow_steps/calculation/feature_counter.py @@ -0,0 +1,64 @@ +from rdkit.Chem import Mol +from rdkit.Chem.rdMolDescriptors import CalcNumRings, CalcNumAromaticRings +from pydantic import BaseModel + +from icolos.utils.enums.program_parameters import FeatureCounterEnum +from icolos.utils.enums.step_enums import StepFeatureCounterEnum +from icolos.core.workflow_steps.step import _LE +from icolos.core.workflow_steps.calculation.base import StepCalculationBase + +_FC = FeatureCounterEnum() +_SFC = StepFeatureCounterEnum() + + +class StepFeatureCounter(StepCalculationBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + # extend parameters with defaults + if _SFC.LEVEL not in self.settings.additional.keys(): + self.settings.additional[_SFC.LEVEL] = _SFC.LEVEL_CONFORMER + self._logger.log( + f'No operational level for feature counting specified, defaulting to "{_SFC.LEVEL_CONFORMER}".', + _LE.INFO, + ) + + def _count_rings(self, mol: Mol): + number_rings = CalcNumRings(mol) + mol.SetProp(_FC.PROPERTY_NUM_RINGS, str(number_rings)) + + def _count_aromatic_rings(self, mol: Mol): + number_rings = CalcNumAromaticRings(mol) + mol.SetProp(_FC.PROPERTY_NUM_AROMATIC_RINGS, str(number_rings)) + + def _get_feature_method(self, feature: str): + if feature == _FC.PROPERTY_NUM_RINGS: + return self._count_rings + elif feature == _FC.PROPERTY_NUM_AROMATIC_RINGS: + return self._count_aromatic_rings + else: + raise ValueError(f'Feature "{feature}" not yet supported.') + + def execute(self): + feature = self.settings.additional[_SFC.FEATURE].lower() + feature_method = self._get_feature_method(feature=feature) + level = self.settings.additional[_SFC.LEVEL] + mol_count = 0 + for compound in self.get_compounds(): + for enumeration in compound.get_enumerations(): + if level == _SFC.LEVEL_ENUMERATION: + mol = enumeration.get_molecule() + if mol is not None: + feature_method(mol) + mol_count = mol_count + 1 + elif level == _SFC.LEVEL_CONFORMER: + for conformer in enumeration.get_conformers(): + mol = conformer.get_molecule() + if mol is not None: + feature_method(mol) + mol_count = mol_count + 1 + else: + raise ValueError(f'Level "{level}" not supported.') + self._logger.log( + f'Counted feature "{feature}" for {mol_count} molecules.', _LE.INFO + ) diff --git a/icolos/core/workflow_steps/calculation/panther.py b/icolos/core/workflow_steps/calculation/panther.py new file mode 100644 index 0000000..c3d8752 --- /dev/null +++ b/icolos/core/workflow_steps/calculation/panther.py @@ -0,0 +1,152 @@ +from icolos.core.containers.generic import GenericData +import os +import tempfile +import re +import numpy as np +from copy import deepcopy +from typing import List + +from icolos.core.workflow_steps.calculation.base import StepCalculationBase +from icolos.utils.enums.program_parameters import PantherEnum +from icolos.utils.enums.step_enums import StepPantherEnum +from icolos.utils.execute_external.execute import Executor +from icolos.core.workflow_steps.step import _LE +from pydantic import BaseModel +from icolos.utils.general.files_paths import attach_root_path + +_SPE = ( + StepPantherEnum() +) # hold the constants to access the relevant value from initialised **data +_PE = PantherEnum() # hold the program settings + + +class StepPanther(StepCalculationBase, BaseModel): + + negative_images: List = [] + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=Executor) + + def _prepare_tmp_input_dir(self): + tmp_dir = tempfile.mkdtemp() + return tmp_dir + + def _write_panther_config_file(self, tmp_dir): + if not self.settings.additional[_SPE.PANTHER_CONFIG_FILE]: + self._logger.log("No config file specified, using default.", _LE.INFO) + panther_config = attach_root_path( + "/icolos/config/panther/default_panther.in" + ) + + elif not os.path.isfile(self.settings.additional[_SPE.PANTHER_CONFIG_FILE]): + self._logger.log( + f"File not found for the provided panther config file path: {self.settings.additional[_SPE.PANTHER_CONFIG_FILE]}", + _LE.ERROR, + ) + raise FileNotFoundError( + f"The specified panther config file was not found {self.settings.additional[_SPE.PANTHER_CONFIG_FILE]}" + ) + + else: + panther_config = self.settings.additional[_SPE.PANTHER_CONFIG_FILE] + + with open(panther_config, "r") as f: + panther_config = f.read() + + # add the parameter absolute paths to the angle, etc. file specifications + update_dictionary = deepcopy(self.settings.additional[_SPE.FIELDS]) + update_dictionary = self._add_ligand_centroid_coordinates(update_dictionary) + update_dictionary = self._add_parameter_locations_to_replacement_fields( + update_dictionary + ) + # update the configuration and write it to a file + panther_config = self._modify_panther_config_file( + panther_config, update_dictionary + ) + + with open(os.path.join(tmp_dir, "panther_config.in"), "w") as f: + f.write(panther_config) + + def _add_parameter_locations_to_replacement_fields( + self, update_dictionary: dict + ) -> dict: + # in case not specified (which is the main use case), use the default libraries for charges etc. that should + # reside in the same folder as the python entry-point "panther.py"; setting absolute paths here, allows to + # execute PANTHER for any input in any given folder + for key, value in _SPE.FIELDS_PARAMETERS_LIB.items(): + if key not in update_dictionary.keys(): + update_dictionary[key] = os.path.join( + self.settings.additional[_SPE.PANTHER_LOCATION], value + ) + return update_dictionary + + def _add_ligand_centroid_coordinates(self, update_dict: dict) -> dict: + coordinates = self._calculate_ligand_centroid( + self.settings.additional[_SPE.FIELDS][_SPE.FIELD_KEY_PDB_FILE] + ) + update_dict[_SPE.FIELD_KEY_COORDINATES] = coordinates + return update_dict + + def _calculate_ligand_centroid(self, file): + with open(file, "r") as f: + file_lines = f.readlines() + file_lines = [ + line for line in file_lines if "X 0" in line and len(line.split()) > 5 + ] + + if file_lines == []: + self._logger.log( + "No lines corresponding to the ligand found! Centroid will not be correct", + _LE.WARNING, + ) + a = np.genfromtxt(file_lines, usecols=[6, 7, 8], skip_header=1) + avg = list(a.mean(axis=0)) + avg = [str(i) for i in avg] + return " ".join(avg) + + def _modify_panther_config_file( + self, config_file: str, update_dictionary: dict + ) -> str: + for key, value in update_dictionary.items(): + pattern = fr"({key}.*:: ).*" + pattern = re.compile(pattern) + config_file = re.sub(pattern, fr"\1 {value}", config_file) + return config_file + + def _execute_backend(self, tmp_dir): + arguments = [ + os.path.join( + self.settings.additional[_SPE.PANTHER_LOCATION], _PE.PANTHER_ENTRYPOINT + ), + os.path.join(tmp_dir, _PE.PANTHER_CONFIG), + os.path.join(tmp_dir, _PE.PANTHER_OUTPUT_FILE), + ] + self._backend_executor.execute( + command=_PE.PANTHER_PTYHON2, arguments=arguments, check=True + ) + + def _parse_panther_output(self, tmp_dir): + try: + with open(os.path.join(tmp_dir, _PE.PANTHER_OUTPUT_FILE), "r") as f: + data = f.read() + self.data.generic.add_file( + GenericData(file_name=_PE.PANTHER_OUTPUT_FILE, file_data=data) + ) + except FileNotFoundError: + self._logger.log( + f"No panther output file was produced for step {self.step_id}, subsequent steps that depend on the negative image will fail.", + _LE.WARNING, + ) + + def execute(self): + tmp_dir = self._prepare_tmp_input_dir() + self._write_panther_config_file(tmp_dir) + self._execute_backend(tmp_dir) + self._logger.log("Executed PANTHER and obtained negative image.", _LE.INFO) + self._logger.log( + f"Calculated negative image for configuration file in {tmp_dir}.", _LE.DEBUG + ) + self._parse_panther_output(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/calculation/rms_filter.py b/icolos/core/workflow_steps/calculation/rms_filter.py new file mode 100644 index 0000000..a150f19 --- /dev/null +++ b/icolos/core/workflow_steps/calculation/rms_filter.py @@ -0,0 +1,97 @@ +import pandas as pd +from typing import List +from pydantic import BaseModel + +from icolos.core.containers.compound import Conformer + +from icolos.utils.enums.step_enums import StepRMSFilterEnum +from icolos.core.workflow_steps.step import _LE +from icolos.core.workflow_steps.calculation.base import StepCalculationBase + +_SRF = StepRMSFilterEnum() + + +class StepRMSFilter(StepCalculationBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + # extend parameters + if _SRF.THRESHOLD not in self.settings.additional.keys(): + self.settings.additional[_SRF.THRESHOLD] = 1 + if _SRF.METHOD not in self.settings.additional.keys(): + self.settings.additional[_SRF.METHOD] = _SRF.METHOD_ALIGNMOL + if _SRF.ORDER_BY not in self.settings.additional.keys(): + self.settings.additional[_SRF.ORDER_BY] = None + else: + if _SRF.ORDER_ASCENDING not in self.settings.additional.keys(): + self._logger.log( + 'Setting order ascending not specified, setting to "True" (default).', + _LE.WARNING, + ) + self.settings.additional[_SRF.ORDER_ASCENDING] = False + + def _get_representative_indices( + self, df_rms: pd.DataFrame, prop_values: List[float] + ) -> List[int]: + keep_indices = [] + prop_idx = list(zip(prop_values, list(range(len(prop_values))))) + threshold = self.settings.additional[_SRF.THRESHOLD] + while len(prop_idx) > 0: + # get the best (according to the property) element's index, add it to the list and remove it from + # the remaining ones + if self.settings.additional[_SRF.ORDER_BY] is not None: + prop_idx = [ + (prop, idx) + for prop, idx in sorted( + prop_idx, reverse=self.settings.additional[_SRF.ORDER_ASCENDING] + ) + ] + cur_best_idx = prop_idx[0][1] + keep_indices.append(cur_best_idx) + del prop_idx[0] + + # remove all, that are fulfilling the RMS threshold + for i in reversed(range(len(prop_idx))): + comp_idx = prop_idx[i][1] + cur_rms = df_rms.iloc[cur_best_idx, comp_idx] + if cur_rms <= threshold: + del prop_idx[i] + return keep_indices + + def _filter_conformers(self, conformers: List[Conformer]) -> List[Conformer]: + # to select the "best" conformers, here the property to use for ordering / ranking is specified + order_by = self.settings.additional[_SRF.ORDER_BY] + if order_by is not None: + prop_values = self._get_property_values(conformers, order_by) + else: + prop_values = [None for _ in range(len(conformers))] + + # generate RMS matrix (NxN, where N is the number of conformers) + df_rms = self._calculate_rms_matrix(conformers, self._get_rms_method()) + + keep_indices = self._get_representative_indices(df_rms, prop_values) + + return [conformers[i] for i in range(len(conformers)) if i in keep_indices] + + def execute(self): + for compound in self.get_compounds(): + for enumeration in compound.get_enumerations(): + if self._input_object_empty(enumeration): + continue + + number_conformers_before = len(enumeration) + + # filter conformers on the enumeration level + filtered_conformers = self._filter_conformers( + conformers=enumeration.get_conformers() + ) + + # add filtered conformers to enumeration + enumeration.clear_conformers() + for conf in filtered_conformers: + enumeration.add_conformer(conformer=conf, auto_update=True) + number_conformers_after = len(enumeration) + self._logger.log( + f"Filtered {number_conformers_before} conformers down to {number_conformers_after} for enumeration {enumeration.get_index_string()}.", + _LE.INFO, + ) diff --git a/icolos/core/workflow_steps/calculation/rmsd.py b/icolos/core/workflow_steps/calculation/rmsd.py new file mode 100644 index 0000000..48fa565 --- /dev/null +++ b/icolos/core/workflow_steps/calculation/rmsd.py @@ -0,0 +1,47 @@ +from typing import List +from pydantic import BaseModel + +from icolos.core.containers.compound import Conformer, unroll_conformers +from icolos.utils.enums.step_enums import StepRMSDEnum, StepDataManipulationEnum +from icolos.core.workflow_steps.step import _LE +from icolos.core.workflow_steps.calculation.base import StepCalculationBase + +_SR = StepRMSDEnum() +_SDM = StepDataManipulationEnum() + + +class StepRMSD(StepCalculationBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + # extend parameters + if _SR.METHOD not in self.settings.additional.keys(): + self.settings.additional[_SR.METHOD] = _SR.METHOD_ALIGNMOL + + def _calculate_RMSD(self, conformers: List[Conformer]): + for conf in conformers: + rmsd_matrix = self._calculate_rms_matrix( + conformers=[conf] + conf.get_extra_data()[_SDM.KEY_MATCHED], + rms_method=self._get_rms_method(), + ) + + # use the specified tag name if it is the first value and append an index in case there are more + for idx, col in enumerate(rmsd_matrix.columns[1:]): + combined_tag = "".join([_SR.RMSD_TAG, "" if idx == 0 else str(idx)]) + rmsd_value = rmsd_matrix.iloc[[0]][col][0] + conf.get_molecule().SetProp(combined_tag, str(rmsd_value)) + conf.get_extra_data()[_SDM.KEY_MATCHED][idx].get_molecule().SetProp( + combined_tag, str(rmsd_value) + ) + + def execute(self): + # this assumes that the conformers that are to be matched for the calculation of the RMSD matrix, are attached + # as a list in a generic data field with a specified key + conformers = unroll_conformers(compounds=self.get_compounds()) + self._calculate_RMSD(conformers=conformers) + self._logger.log( + f"Annotated {len(conformers)} conformers with RMSD values (tag: {_SR.RMSD_TAG}).", + _LE.INFO, + ) + + # TODO: add a nice pandas DF with the RMSD values to a generic data field diff --git a/icolos/core/workflow_steps/calculation/shaep.py b/icolos/core/workflow_steps/calculation/shaep.py new file mode 100644 index 0000000..267e4bb --- /dev/null +++ b/icolos/core/workflow_steps/calculation/shaep.py @@ -0,0 +1,77 @@ +from icolos.utils.execute_external.execute import Executor +from icolos.core.workflow_steps.calculation.base import StepCalculationBase +from icolos.core.workflow_steps.step import _LE +from icolos.utils.enums.step_enums import StepShaepEnum +from icolos.utils.enums.program_parameters import PantherEnum, ShaepEnum +from icolos.core.containers.compound import Conformer +import tempfile +from pydantic import BaseModel +import os + +_SSE = StepShaepEnum() +_SE = ShaepEnum() +_PE = PantherEnum() + + +class StepShaep(StepCalculationBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=Executor) + + def _prepare_tmp_input_dir(self): + tmp_dir = tempfile.mkdtemp() + return tmp_dir + + def _execute_backend(self, conf_path: str, tmp_dir: str, ni_path: str): + arguments = [ + os.path.join(tmp_dir, ni_path), + conf_path, + os.path.join(tmp_dir, _SE.OUTPUT_SIMILARITY), + ] + self._backend_executor.execute( + command=_SE.SHAEP_EXECUTABLE, arguments=arguments, check=True + ) + + def _parse_output(self, tmp_dir: str, conformer: Conformer): + with open(os.path.join(tmp_dir, _SE.OUTPUT_SIMILARITY), "r") as f: + # TODO: add support for multiple input structures; ignore the names (all will be in one line), but from + # position 8 (index 7 in python) onwards, the shape and esp similarities are reported in the same + # order as the input, i.e. <7 other values> mol1_shape mol1_esp mol2_shape ... + parts = f.readlines()[1].split("\t") + conformer.get_molecule().SetProp(_SE.TAG_SHAPE_SIMILARITY, str(parts[7])) + conformer.get_molecule().SetProp(_SE.TAG_ESP_SIMILARITY, str(parts[8])) + + def execute(self): + number_rescored = 0 + for compound in self.get_compounds(): + for enumeration in compound.get_enumerations(): + if len(enumeration.get_conformers()) == 0: + self._logger.log( + f"Found no conformers for enumeration {enumeration} for compound {compound}.", + _LE.WARNING, + ) + # we can still execute shaep at the enumeration level, if the compounds are correcty annotated they should be written out ok. Will be slower though + # easiest for now is to add the enumeration mol object as a single conformer and run that through shaep + mol = enumeration.get_molecule() + conf = Conformer(conformer=mol) + enumeration.add_conformer(conf) + + # TODO: ShaEP allow batch execution for any number of compounds (parsing gets more difficult though) + # Implement that to avoid overhead from file system issues + # TODO: Refactor and add comments + for conformer in enumeration.get_conformers(): + tmp_dir = self._prepare_tmp_input_dir() + conf_path = os.path.join(tmp_dir, _SE.CONFORMER_PATH) + ni_file = self.data.generic.get_files_by_extension("mol2")[0] + ni_file.write(tmp_dir) + conformer.write(conf_path) + self._execute_backend(conf_path, tmp_dir, ni_file.get_file_name()) + self._parse_output(tmp_dir, conformer) + self._logger.log( + f"Finished shaep execution for conformer {enumeration.get_index_string()}.", + _LE.DEBUG, + ) + number_rescored += 1 + self._remove_temporary(tmp_dir) + self._logger.log(f"Executed ShaEP for {number_rescored} conformers.", _LE.INFO) diff --git a/icolos/core/workflow_steps/calculation/turbomole.py b/icolos/core/workflow_steps/calculation/turbomole.py new file mode 100644 index 0000000..0ff9d7e --- /dev/null +++ b/icolos/core/workflow_steps/calculation/turbomole.py @@ -0,0 +1,440 @@ +import os +import tempfile +from typing import Tuple, List +from copy import deepcopy + +from pydantic import BaseModel + +from icolos.utils.enums.step_enums import StepTurbomoleEnum +from icolos.utils.execute_external.execute import execution_successful +from icolos.utils.execute_external.openbabel import OpenBabelExecutor +from icolos.utils.execute_external.turbomole import TurbomoleExecutor +from icolos.utils.general.convenience_functions import nested_get + +from icolos.utils.general.molecules import get_charge_for_molecule + +from icolos.core.containers.compound import Conformer, Enumeration + +from icolos.utils.enums.program_parameters import OpenBabelEnum +from icolos.utils.enums.program_parameters import TurbomoleEnum +from icolos.utils.enums.compound_enums import ConformerContainerEnum +from icolos.core.workflow_steps.calculation.base import StepCalculationBase +from icolos.core.workflow_steps.step import _LE +from icolos.loggers.logger_utils import log_multiline_string +from icolos.utils.general.files_paths import _FG, check_file_availability, gen_tmp_file + +from icolos.utils.general.parallelization import Parallelizer, SubtaskContainer + +_OE = OpenBabelEnum() +_EE = TurbomoleEnum() +_COE = ConformerContainerEnum() +_STE = StepTurbomoleEnum() + + +class StepTurbomole(StepCalculationBase, BaseModel): + """Step that executes Turbomole. + + Note, that the execution (especially in conjunction with a subsequent cosmo step) is relatively complex. + (1) Write the conformer as an SDF file to a temporary directory, + (2) use obabel to translate it to an XYZ file, + (3) use t2x to make a coord file out of it (input for turbomole; is updated during geometry optimization), + (4) execute Turbomole, generating a (i) a final coord file and (ii) a trajectory (if specified), + (5) use x2t to extract the final "snapshot" as an XYZ file and translate it to SDF and + (6) update the coordinates and tags in the conformers + + IMPORTANT: Keep the "mol.cosmo" file attached to the conformer as additional data for a possible cosmo step.""" + + def __init__(self, **data): + super().__init__(**data) + + # initialize the executor and test availability + self._initialize_backend(executor=TurbomoleExecutor) + # TODO: figure out why "module load turbomole/73 && rdfit" sometimes fails (see also below) and + # use strict=True after fix; probably, it has to to with $TURBOTMPDIR (all parallel jobs access the same) + self._check_backend_availability(strict=False) + + def get_original_conformer(self, conformer) -> Conformer: + for compound in self.get_compounds(): + for enum in compound.get_enumerations(): + if ( + enum._enumeration_id + == conformer.get_enumeration_object().get_enumeration_id() + ): + for conf in enum.get_conformers(): + if conf._conformer_id == conformer._conformer_id: + return conf + + def _prepare_tmp_input_directories( + self, batch: List + ) -> Tuple[List, List[str], List[str], List[str], List[str], List[str], List[str]]: + conformers = [] + tmp_dirs = [] + paths_input_sdf = [] + paths_input_xyz = [] + paths_coord = [] + paths_tm_config = [] + paths_cosmo_config = [] + for sublist in batch: + for element in sublist: # there is only one + conformer = element.data + conformers.append(conformer) + # 1) generate all temporary paths + tmp_dir = tempfile.mkdtemp() + _, path_input_sdf = gen_tmp_file( + prefix="tmp_", suffix=".sdf", dir=tmp_dir + ) + _, path_input_xyz = gen_tmp_file( + prefix="tmp_", suffix=".xyz", dir=tmp_dir + ) + path_coord = os.path.join(tmp_dir, _EE.COORD) + + # 2) write-out the conformers for an enumeration in a SDF file + conformer.write(path=path_input_sdf) + + # 3) translate the SDF into an XYZ file (using OpenBabel) + # Note, that all tags are lost here (but the names are not!) + obabel_executor = OpenBabelExecutor() + obabel_executor.execute( + command=_OE.OBABEL, + arguments=[ + _OE.OBABEL_INPUTFORMAT_SDF, + path_input_sdf, + _OE.OBABEL_OUTPUTFORMAT_XYZ, + "".join([_OE.OBABEL_O, path_input_xyz]), + ], + check=True, + location=tmp_dir, + ) + + # 4) translate the XYZ to a TM input file ("coord"); "x2t" writes to stdout + result = self._backend_executor.execute( + command=_EE.TM_X2T, arguments=[path_input_xyz], check=True + ) + with open(path_coord, "w") as file: + file.write(result.stdout) + + # 5) add paths + tmp_dirs.append(tmp_dir) + paths_input_sdf.append(path_input_sdf) + paths_input_xyz.append(path_input_xyz) + paths_coord.append(path_coord) + + tm_path, cosmo_path = self._get_config_paths(conformer) + paths_tm_config.append(tm_path) + paths_cosmo_config.append(cosmo_path) + + return ( + conformers, + tmp_dirs, + paths_input_sdf, + paths_input_xyz, + paths_coord, + paths_tm_config, + paths_cosmo_config, + ) + + def _get_config_paths(self, conformer: Conformer) -> Tuple[str, str]: + try: + config_dir = self.settings.additional[_EE.TM_CONFIG_DIR] + config_basename = self.settings.additional[_EE.TM_CONFIG_BASENAME] + path_cosmo_config = self.settings.additional[_EE.TM_CONFIG_COSMO] + except KeyError as e: + raise KeyError("The dir, basename and cosmo paths need to be set.") from e + + charge = str( + get_charge_for_molecule( + molecule=conformer._enumeration_object.get_molecule() + ) + ) + + # the path would look like: /opt/Icolos/turbomole_config/b97-3c-ri-d3-def2-mtzvp-int-nosym-charge-1.tm + path_tm_config = os.path.join( + config_dir, "".join([config_basename, charge, _EE.TM_CONFIG_ENDING]) + ) + return path_tm_config, path_cosmo_config + + def _execute_define(self, tmp_dir, path_tm_config: str): + result = self._backend_executor.execute( + command=_EE.TM_DEFINE, + arguments=[" ".join(["<", path_tm_config])], + check=True, + location=tmp_dir, + ) + + if not execution_successful(result.stderr, _EE.TM_DEFINE_SUCCESS_STRING): + self._logger.log( + f"Execution of {_EE.TM_DEFINE} failed for file {path_tm_config}. Error message:", + _LE.ERROR, + ) + log_multiline_string( + logger=self._logger_blank, + level=_LE.ERROR, + multi_line_string=result.stdout, + ) + + def _execute_cosmoprep(self, tmp_dir, path_cosmo_config: str): + result = self._backend_executor.execute( + command=_EE.TM_COSMOPREP, + arguments=[" ".join(["<", path_cosmo_config])], + check=True, + location=tmp_dir, + ) + + if not execution_successful(result.stderr, _EE.TM_COSMOPREP_SUCCESS_STRING): + self._logger.log( + f"Execution of {_EE.TM_COSMOPREP} failed for file {path_cosmo_config}. Error message:", + _LE.ERROR, + ) + log_multiline_string( + logger=self._logger_blank, + level=_LE.ERROR, + multi_line_string=result.stdout, + ) + + def _manipulate_control_script(self, path: str): + # do the following changes to the "control" script in order to generate FINE Cosmo files + with open(path, "r") as f: + control = f.readlines() + new_control = [] + for line in control: + if line.rstrip("\n") != _EE.CONTROL_COSMO_OUT: + new_control.append(line) + else: + new_control.append("".join([_EE.CONTROL_COSMO_REPLACE, "\n"])) + + # only add this line in case there is no optimization run going on + if ( + nested_get( + self.settings.additional, + [_STE.EXECUTION_MODE], + default=_EE.TM_RIDFT, + ) + == _EE.TM_RIDFT + ): + new_control.append("".join([_EE.CONTROL_COSMO_INSERTION, "\n"])) + with open(path, "w") as f: + f.writelines(new_control) + + def _get_arguments(self) -> list: + arguments = [] + + # add flags + for flag in self.settings.arguments.flags: + arguments.append(flag) + + # flatten the dictionary into a list for command-line execution + for key in self.settings.arguments.parameters.keys(): + arguments.append(key) + arguments.append(self.settings.arguments.parameters[key]) + return arguments + + def _execute_run(self, tmp_dir): + execution_mode = nested_get( + self.settings.additional, [_STE.EXECUTION_MODE], default=_EE.TM_RIDFT + ) + result = self._backend_executor.execute( + command=execution_mode, + arguments=self._get_arguments(), + check=True, + location=tmp_dir, + ) + + if ( + not execution_successful(result.stderr, _EE.TM_RIDFT_SUCCESS_STRING) + or result.returncode != 0 + ): + self._logger.log( + f"Execution of {execution_mode} failed (return code: {result.returncode}). Error message (stdout & stderr):", + _LE.DEBUG, + ) + log_multiline_string( + logger=self._logger_blank, + level=_LE.DEBUG, + multi_line_string=result.stdout, + ) + log_multiline_string( + logger=self._logger_blank, + level=_LE.DEBUG, + multi_line_string=result.stderr, + ) + return result.returncode + + def _coord2sdf(self, tmp_dir, path_output_xyz: str, path_output_sdf: str): + # extract the latest snapshot and write it as an XYZ file + result = self._backend_executor.execute( + command=_EE.TM_T2X, arguments=[_EE.TM_T2X_C], check=True, location=tmp_dir + ) + + with open(path_output_xyz, "w") as file: + file.write(result.stdout) + + # translate it to an SDF + obabel_executor = OpenBabelExecutor() + obabel_executor.execute( + command=_OE.OBABEL, + arguments=[ + _OE.OBABEL_INPUTFORMAT_XYZ, + path_output_xyz, + _OE.OBABEL_OUTPUT_FORMAT_SDF, + "".join([_OE.OBABEL_O, path_output_sdf]), + ], + check=True, + ) + + def _parse_output(self, tmp_dirs: List[str], conformers: List[Conformer]): + results = [] + # load and attach "mol.cosmo" file + for tmp_dir, conformer in zip(tmp_dirs, conformers): + result = _STE.SUCCESS + cosmo_path = os.path.join(tmp_dir, _EE.TM_OUTPUT_COSMOFILE) + if check_file_availability(path=cosmo_path) != _FG.NOT_GENERATED: + with open(cosmo_path, "r") as f: + file_content = f.readlines() + conf = self.get_original_conformer(conformer) + conf.add_extra_data(key=_COE.EXTRA_DATA_COSMOFILE, data=file_content) + # conformer.add_extra_data(key=_COE.EXTRA_DATA_COSMOFILE, data=file_content) + + else: + self._logger.log( + f"Could not load cosmo file for {conformer.get_index_string()}, will remove conformer.", + _LE.WARNING, + ) + self._logger.log( + f"File {cosmo_path} could not be loaded for {conformer.get_index_string()}.", + _LE.DEBUG, + ) + result = _STE.FAILED + + # set molecule to None removes the 3D coordinates -> will be deleted in the end + conformer.set_molecule(None) + + # load and attach "coord" file + coord_file = os.path.join(tmp_dir, _EE.TM_OUTPUT_COORDFILE) + coord_file_status = check_file_availability(path=coord_file) + if coord_file_status == _FG.NOT_GENERATED: + self._logger.log( + f"File {coord_file} could not be loaded for {conformer.get_index_string()}.", + _LE.DEBUG, + ) + result = _STE.FAILED + elif coord_file_status == _FG.GENERATED_EMPTY: + self._logger.log( + f"File {coord_file} is empty for {conformer.get_index_string()}.", + _LE.DEBUG, + ) + result = _STE.FAILED + elif coord_file_status == _FG.GENERATED_SUCCESS: + with open(coord_file, "r") as f: + file_content = f.readlines() + conf = self.get_original_conformer(conformer) + conf.add_extra_data( + key=_COE.EXTRA_DATA_COORDFILE, data=file_content + ) + + execution_mode = nested_get( + self.settings.additional, + [_STE.EXECUTION_MODE], + default=_EE.TM_RIDFT, + ) + + # for RIDFT, only the cosmo file is required as coordinates are not updated (no geometry optimization) + if execution_mode != _EE.TM_RIDFT: + path_output_xyz = os.path.join(tmp_dir, _EE.TM_OUTPUT_FINAL_XYZ) + path_output_sdf = os.path.join(tmp_dir, _EE.TM_OUTPUT_FINAL_SDF) + self._coord2sdf(tmp_dir, path_output_xyz, path_output_sdf) + conf = self.get_original_conformer(conformer) + conf.update_coordinates(path=path_output_sdf) + results.append(result) + return results + + def _clean_failed_conformers(self, enumeration: Enumeration) -> Tuple[int, int]: + n_conformers_before = len(enumeration.get_conformers()) + enumeration.clean_failed_conformers() + n_conformers_after = len(enumeration.get_conformers()) + return n_conformers_before, n_conformers_after + + def _run_conformer( + self, + conformer: Conformer, + tmp_dir: str, + path_tm_config: str, + path_cosmo_config: str, + ) -> None: + self._execute_define(tmp_dir=tmp_dir, path_tm_config=path_tm_config) + # execute COSMOprep (update "control") + self._execute_cosmoprep(tmp_dir=tmp_dir, path_cosmo_config=path_cosmo_config) + # set a necessary environment variable + os.environ[_EE.TM_TURBOTMPDIR] = tmp_dir + # update the "control" file + self._manipulate_control_script(path=os.path.join(tmp_dir, _EE.CONTROL)) + # all ready; start the execution + self._execute_run(tmp_dir) + + self._logger.log( + f"Finished Turbomole execution for conformer {conformer.get_index_string()} in directory {tmp_dir}.", + _LE.DEBUG, + ) + + def _execute_turbomole_parallel(self): + parallelizer = Parallelizer(func=self._run_conformer) + n = 1 + + while self._subtask_container.done() is False: + + next_batch = self._get_sublists( + get_first_n_lists=self._get_number_cores() + ) # return n lists of length max_sublist_length + _ = [sub.increment_tries() for element in next_batch for sub in element] + _ = [sub.set_status_failed() for element in next_batch for sub in element] + + ( + conformers, + tmp_dirs, + paths_input_sdf, + paths_input_xyz, + paths_coord, + paths_tm_configs, + paths_cosmo_configs, + ) = self._prepare_tmp_input_directories(next_batch) + + self._logger.log( + f"Executing Turbomole for batch {n} containing {len(tmp_dirs)} conformers", + _LE.INFO, + ) + + parallelizer.execute_parallel( + conformer=conformers, + tmp_dir=tmp_dirs, + path_tm_config=paths_tm_configs, + path_cosmo_config=paths_cosmo_configs, + ) + + results = self._parse_output(tmp_dirs, conformers) + + for sublist, result in zip(next_batch, results): + # TODO: this only works if max length sublist == 1, fine for now as that is all turbomole can handle + for task in sublist: + if result == _STE.SUCCESS: + task.set_status_success() + else: + task.set_status_failed() + self._remove_temporary(tmp_dirs) + n += 1 + + def execute(self): + all_conformers = [] + for compound in self.get_compounds(): + for enumeration in compound.get_enumerations(): + if self._input_object_empty(enumeration): + continue + for conformer in enumeration.get_conformers(): + # for efficient parallelisation, unroll all conformers + conf = deepcopy(conformer) + all_conformers.append(conf) + + self.execution.parallelization.max_length_sublists = 1 + self._subtask_container = SubtaskContainer( + max_tries=self.execution.failure_policy.n_tries + ) + self._subtask_container.load_data(all_conformers) + self._execute_turbomole_parallel() diff --git a/icolos/core/workflow_steps/cavity_explorer/__init__.py b/icolos/core/workflow_steps/cavity_explorer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/core/workflow_steps/cavity_explorer/base.py b/icolos/core/workflow_steps/cavity_explorer/base.py new file mode 100644 index 0000000..0ed37f8 --- /dev/null +++ b/icolos/core/workflow_steps/cavity_explorer/base.py @@ -0,0 +1,69 @@ +from pydantic import BaseModel +from icolos.core.workflow_steps.step import StepBase +from typing import List +from icolos.utils.enums.step_enums import StepCavExploreEnum + +_SFP = StepCavExploreEnum() + + +class StepCavityExplorerBase(StepBase, BaseModel): + eps: float = None + iso_value: int = None + threshold: float = None + min_samples: int = None + format_: str = None + + def __init__(self, **data): + super().__init__(**data) + + def _write_input_files(self, tmp_dir): + # HM: this is the simplest implementation - we can think about whether we need any more complexity + for file in self.data.generic.get_flattened_files(): + file.write(tmp_dir) + + def _parse_arguments(self, flag_dict: dict, args: list = None) -> List: + arguments = args if args is not None else [] + # first add the settings from the command line + for key in self.settings.arguments.parameters.keys(): + arguments.append(key) + arguments.append(str(self.settings.arguments.parameters[key])) + for flag in self.settings.arguments.flags: + arguments.append(str(flag)) + for key, value in flag_dict.items(): + # only add defaults if they have not been specified in the json + if key not in arguments: + arguments.append(key) + arguments.append(value) + return arguments + + def _set_mdpocket_args(self): + if self.settings.additional is not None: + keys = self.settings.additional.keys() + + self.eps = self.settings.additional[_SFP.EPS] if _SFP.EPS in keys else 3 + self.iso_value = ( + self.settings.additional[_SFP.ISO_VALUE] + if _SFP.ISO_VALUE in keys + else 0.5 + ) + self.threshold = ( + self.settings.additional[_SFP.THRESHOLD] + if _SFP.THRESHOLD in keys + else 20.0 + ) + self.min_samples = ( + self.settings.additional[_SFP.MIN_SAMPLES] + if _SFP.MIN_SAMPLES in keys + else 25 + ) + if _SFP.TRAJ_TYPE in keys: + if self.settings.additional[_SFP.TRAJ_TYPE].lower() == "gromacs": + self.format_ = "xtc" + elif self.settings.additional[_SFP.TRAJ_TYPE].lower() == "desmond": + self.format_ = "dtr" + else: + raise ValueError( + "Only Desmond and GROMACS trajectory types are supported" + ) + else: + raise ValueError("Trajectory format was not specified!") diff --git a/icolos/core/workflow_steps/cavity_explorer/mdpocket.py b/icolos/core/workflow_steps/cavity_explorer/mdpocket.py new file mode 100644 index 0000000..0d4eada --- /dev/null +++ b/icolos/core/workflow_steps/cavity_explorer/mdpocket.py @@ -0,0 +1,306 @@ +from icolos.utils.general.parallelization import Parallelizer, SubtaskContainer +from pydantic import BaseModel +from icolos.core.workflow_steps.cavity_explorer.base import StepCavityExplorerBase +from icolos.utils.enums.step_enums import StepCavExploreEnum +from icolos.utils.execute_external.execute import Executor +from icolos.core.workflow_steps.step import _LE +from sklearn.cluster import DBSCAN +from collections import Counter +import numpy as np +import re +import os + +_SFP = StepCavExploreEnum() + + +class StepMDpocket(StepCavityExplorerBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + # self._initialize_backend(executor=MPIExecutor) + self._initialize_backend(executor=Executor) + + # set max_length_sublists to 1 + self.execution.parallelization.max_length_sublists = 1 + + def _create_density_grid_file(self, tmp_dir: str, iso_value: float): + """creates a density grid from the .dx-file into a .pdb-file, heavily influenced by extractISOPdb.py provided + by fpocket""" + density_file = [ + file for file in os.listdir(tmp_dir) if file.endswith("dens_grid.dx") + ] + assert len(density_file) == 1 + density_file = density_file[0] + + outfile = os.path.join(tmp_dir, f"iso{iso_value}.pdb") + + with open(os.path.join(tmp_dir, density_file), "r") as f: + # get the axis that shows the most variation during the trajectory, this will be the leading axis + # read the header - here is an example + header = "" + tmp = f.readline() + while tmp[0] != "o": + header = header + tmp + tmp = f.readline() + + # read the grid size + r = re.compile("\w+") + gsize = r.findall(tmp) + gsize = [int(gsize[-3]), int(gsize[-2]), int(gsize[-1])] + + # read the origin of the system + line = f.readline().split() + origin = [float(line[-3]), float(line[-2]), float(line[-1])] + + # read grid space + line = f.readline().split() + deltax = [float(line[-3]), float(line[-2]), float(line[-1])] + line = f.readline().split() + deltay = [float(line[-3]), float(line[-2]), float(line[-1])] + line = f.readline().split() + deltaz = [float(line[-3]), float(line[-2]), float(line[-1])] + + # pay attention here, this assumes always orthogonal normalized space, but normally it should be ok + delta = np.array([deltax[0], deltay[1], deltaz[2]]) + + # read the number of data + f.readline() + r = re.compile("\d+") + n_entries = int(r.findall(f.readline())[2]) + + if n_entries != gsize[0] * gsize[1] * gsize[2]: + raise AssertionError( + "Error reading the file. The number of expected data points does not correspond to the number of " + "labeled data points in the header." + ) + # create a 3D numpy array filled up with 0 + # initiate xyz counter for reading the grid data + z = 0 + y = 0 + x = 0 + + self._logger.log("Reading grid file...", _LE.DEBUG) + + with open(outfile, "w") as f_out: + counter = 1 + for _ in range(n_entries // 3): + c = f.readline().split() + if len(c) != 3: + self._logger.log("error reading grid data", _LE.ERROR) + raise AssertionError + for i in range(3): + if (0 > iso_value > float(c[i])) or ( + 0 < iso_value < float(c[i]) + ): + # f_out.write(f"ATOM {counter} C PTH 1 {origin[0] + float(x) * delta[0]} {origin[1] + float(y) * delta[1]} {origin[2] + float(z) * delta[2]} 0.00 0.00\n") + f_out.write( + "ATOM %5d C PTH 1 %8.3f%8.3f%8.3f%6.2f%6.2f\n" + % ( + counter, + origin[0] + float(x) * delta[0], + origin[1] + float(y) * delta[1], + origin[2] + float(z) * delta[2], + 0.0, + 0.0, + ) + ) + counter += 1 + z += 1 + if z >= gsize[2]: + z = 0 + y += 1 + if y >= gsize[1]: + y = 0 + x += 1 + + self._logger.log(f"Finished writing {outfile}", _LE.DEBUG) + + def _cluster_pockets(self, tmp_dir, eps, min_samples, threshold, iso_value): + """ + Clusters points from the initial MDpocket density grid, at a certain iso value + """ + iso_file = os.path.join(tmp_dir, f"iso{iso_value}.pdb") + with open(iso_file, "r") as f: + # collects the data from the pdb-file (x,y,z coordinates) + data = { + (line[5:11].strip()): ( + line[30:38].strip(), + line[38:46].strip(), + line[46:54].strip(), + ) + for line in f.readlines() + } + db = DBSCAN(eps=eps, min_samples=min_samples).fit( + np.array(list(data.values())).astype(np.float64) + ) + + labels = db.labels_ + data_ = np.array(list(data.values())).astype(np.float64) + db.fit_predict(data_) + + self._logger.log( + f"Number of clusters found for eps = {eps}, iso = {iso_value}, min_samples = {min_samples} and threshold = {threshold} is: {len(set(db.labels_))}", + _LE.DEBUG, + ) + + pockets_report = Counter(db.labels_) + filtered_pockets = [] + filtered_data = {} + filtered_labels = [] + + # sorts out the pockets with more than threshold points + for k, v in pockets_report.items(): + if v > self.threshold and k >= 0: + filtered_pockets.append(k) + + # get the keys and labels for each data point + res = list(zip(list(data.keys()), labels)) + + # get lists with the data and labels for the filtered pockets + for pocket in filtered_pockets: + for (index, label) in res: + if label == pocket: + filtered_data[index] = data.get(index) + filtered_labels.append(label) + + self._logger.log( + f"PocketIDs having more than {self.threshold} points are: {filtered_pockets}", + _LE.DEBUG, + ) + self._logger.log( + f"The number of filtered pockets is: {len(filtered_pockets)}", _LE.DEBUG + ) + return data, labels, filtered_data, filtered_labels, pockets_report + + def _save_pocket_files(self, tmp_dir, data, labels): + """saves the individual pockets as individual pdbs to be used with mdpocket""" + iso_file = os.path.join(tmp_dir, f"iso{self.iso_value}.pdb") + + # define labels and indices + res = list(zip(list(data.keys()), labels)) + with open(iso_file, "r") as f: + original_lines = f.readlines() + # filter out the indices of all pockets except outliers + indices = list(set([l for l in labels if l >= 0])) + + # save the pocket-pdbs - these are passed with --selected-pocket arg later + for label in indices: + with open(os.path.join(tmp_dir, f"pocket_{label}.pdb"), "w") as f: + for (index, lab) in res: + if lab == label: + f.write(original_lines[int(index) - 1]) + + def _run_mdpocket_selected_pocket(self, tmp_dir): + """runs the second mdpocket command for fpocket3""" + pocket_files = [ + file + for file in os.listdir(tmp_dir) + if file.endswith(".pdb") and "pocket_" in file + ] + argument_dicts = [] + for file in pocket_files: + arguments = self._parse_arguments( + flag_dict={ + "--trajectory_file": os.path.join( + tmp_dir, + self.data.generic.get_argument_by_extension(self.format_), + ), + "--trajectory_format": self.format_, + "--selected_pocket": os.path.join( + tmp_dir, os.path.join(tmp_dir, file) + ), + "-f": self.data.generic.get_argument_by_extension("pdb"), + "-o": file.split(".")[0], + } + ) + argument_dicts.append(arguments) + + fpocket_parallelizer = Parallelizer(func=self._execute_mdpocket) + self._subtask_container = SubtaskContainer( + max_tries=self.execution.failure_policy.n_tries + ) + self._subtask_container.load_data(argument_dicts) + + while self._subtask_container.done() is False: + next_batch = self._get_sublists(get_first_n_lists=self._get_number_cores()) + + batch_dirs, batch_args = self._prepare_batch_inputs(next_batch, tmp_dir) + + fpocket_parallelizer.execute_parallel( + tmp_dir=batch_dirs, arguments=batch_args + ) + _ = [sub.increment_tries() for element in next_batch for sub in element] + _ = [sub.set_status_failed() for element in next_batch for sub in element] + + def _prepare_batch_inputs(self, batch, tmp_dir): + tmp_dirs = [] + args = [] + for next_subtask_list in batch: + tmp_dirs.append(tmp_dir) + for ( + subtask + ) in ( + next_subtask_list + ): # enforced only one task per subtask, otherwise it makes no sense + args.append(subtask.data) # append the arguments list + return tmp_dirs, args + + def _execute_mdpocket(self, tmp_dir, arguments): + + self._backend_executor.execute( + command=_SFP.MDPOCKET_COMMAND, + arguments=arguments, + location=tmp_dir, + check=True, + ) + + def execute(self): + + tmp_dir = self._make_tmpdir() + # print(paths) + self._write_input_files(tmp_dir) + # set some constants from the arguments + self._set_mdpocket_args() + + # execute the initial mdpocket job (without a specific pocket) to produce the .dx file + mdpocket_run1_args = self._parse_arguments( + flag_dict={ + "--trajectory_file": os.path.join( + tmp_dir, self.data.generic.get_argument_by_extension(self.format_) + ), + "--trajectory_format": self.format_, + "-f": os.path.join( + tmp_dir, self.data.generic.get_argument_by_extension("pdb") + ), + } + ) + + # run the first command, produce the dx file and a bunch of pocket_n.pdb pocket topology files + self._execute_mdpocket(tmp_dir, mdpocket_run1_args) + + # take the produced dx file and create the density grid in pdb format + self._create_density_grid_file(tmp_dir, iso_value=self.iso_value) + + # We don't need all of this, but cluster pockets + data, labels, _, _, _ = self._cluster_pockets( + tmp_dir=tmp_dir, + eps=self.eps, + min_samples=self.min_samples, + threshold=self.threshold, + iso_value=self.iso_value, + ) + + # produces a load of pocket_n.pdb files based on the clusters identified by dbscan + self._save_pocket_files(tmp_dir, data, labels) + # run MD pocket the second time with a specified pocket - produce a pocket parameter fil + # this should be done for each individual pocekt, in parallele + # check whether the descriptors flag has been set + # if _SFP.DESCRIPTORS in self.settings.additional.keys() and self.settings.additional[_SFP.DESCRIPTORS]: + self._run_mdpocket_selected_pocket(tmp_dir) + + # save what's in the tmpdir, then remove tmpdir + self._parse_output(tmp_dir) + self._logger.log( + f"Completed execution for {self.step_id} successfully", _LE.INFO + ) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/confgen/__init__.py b/icolos/core/workflow_steps/confgen/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/core/workflow_steps/confgen/base.py b/icolos/core/workflow_steps/confgen/base.py new file mode 100644 index 0000000..0e440be --- /dev/null +++ b/icolos/core/workflow_steps/confgen/base.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel +from icolos.core.workflow_steps.step import StepBase + + +class StepConfgenBase(StepBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) diff --git a/icolos/core/workflow_steps/confgen/crest.py b/icolos/core/workflow_steps/confgen/crest.py new file mode 100644 index 0000000..ba00946 --- /dev/null +++ b/icolos/core/workflow_steps/confgen/crest.py @@ -0,0 +1,121 @@ +import os +from typing import List + +from pydantic import BaseModel +from rdkit import Chem +from copy import deepcopy + +from icolos.utils.execute_external.crest import CrestExecutor + +from icolos.utils.general.molecules import get_charge_for_molecule + +from icolos.core.containers.compound import Enumeration, Conformer + +from icolos.utils.enums.program_parameters import CrestEnum, CrestOutputEnum +from icolos.core.workflow_steps.step import _LE, _CTE +from icolos.core.workflow_steps.confgen.base import StepConfgenBase + +_EE = CrestEnum() +_COE = CrestOutputEnum() + + +class StepCREST(StepConfgenBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + # initialize the executor and test availability + self._initialize_backend(executor=CrestExecutor) + self._check_backend_availability() + + def _get_energies_from_XYZ(self, path) -> list: + energies = [] + with open(path, "r") as f: + lines = f.readlines() + for line in lines: + if line.startswith(_COE.PREFIX_ENERGIES_XYZ): + energies.append(line.lstrip().rstrip()) + return energies + + def _parse_CREST_result( + self, dir_path: str, enumeration: Enumeration + ) -> List[Conformer]: + """Function to parse the result from CREST.""" + # CREST will output a variety of files to "dir_path" + conformers_sdf = os.path.join(dir_path, _COE.CREST_CONFORMERS_SDF) + conformers_xyz = os.path.join(dir_path, _COE.CREST_CONFORMERS_XYZ) + + # as the energies are lost in the SDF output, we will add them as a tag + energies = self._get_energies_from_XYZ(conformers_xyz) + charge = str( + get_charge_for_molecule(enumeration.get_molecule(), add_as_tag=False) + ) + mol_supplier = Chem.SDMolSupplier(conformers_sdf, removeHs=False) + result = [] + for mol_id, mol in enumerate(mol_supplier): + mol.SetProp(_CTE.CONFORMER_ENERGY_TAG, energies[mol_id]) + mol.SetProp(_CTE.FORMAL_CHARGE_TAG, charge) + result.append(Conformer(conformer=mol)) + return result + + def _set_formal_charge(self, parameters: dict, molecule: Chem.Mol) -> dict: + charge = get_charge_for_molecule(molecule, add_as_tag=False) + parameters[_EE.CREST_CHRG] = charge + self._logger.log(f"Set charge for molecule to {charge}.", _LE.DEBUG) + return parameters + + def _set_number_cores(self, parameters: dict) -> dict: + """Function for parallelization of task, setting the number of cores to be used.""" + parameters[_EE.CREST_T] = int(self.execution.parallelization.cores) + return parameters + + def _prepare_settings(self, tmp_dir: str, enumeration: Enumeration) -> list: + # first position is the input (SDF) file; the internal input at this stage is a molecule + # -> write it to a temporary SDF file (undocumented input functionality) and add the path + settings = [self._prepare_temp_input(tmp_dir, enumeration.get_molecule())] + + # add flags + for flag in self.settings.arguments.flags: + settings.append(flag) + + # add parameters + parameters = deepcopy(self.settings.arguments.parameters) + + # update / over-write fields that need a specific value or are defined elsewhere + parameters = self._set_number_cores(parameters) + parameters = self._set_formal_charge(parameters, enumeration.get_molecule()) + + # flatten the dictionary into a list for command-line execution + for key in parameters.keys(): + settings.append(key) + settings.append(parameters[key]) + return settings + + def execute(self): + for compound in self.get_compounds(): + for enumeration in compound.get_enumerations(): + if not self._input_object_valid(enumeration): + continue + + # set up + tmp_dir = self._move_to_temp_dir() + + # the call to CREST starts with the path to the input file, followed by arguments and flags + settings = self._prepare_settings(tmp_dir, enumeration=enumeration) + + self._logger.log( + f"Executing CREST backend in folder {tmp_dir}.", _LE.DEBUG + ) + result = self._backend_executor.execute( + command=_EE.CREST, arguments=settings, check=False + ) + self._restore_working_dir() + + conformers = self._parse_CREST_result(tmp_dir, enumeration=enumeration) + enumeration.clear_conformers() + enumeration.add_conformers(conformers=conformers, auto_update=True) + self._logger.log( + f"Executed CREST and obtained {len(conformers)} for enumeration {enumeration.get_index_string()}", + _LE.INFO, + ) + + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/confgen/omega.py b/icolos/core/workflow_steps/confgen/omega.py new file mode 100644 index 0000000..b91646e --- /dev/null +++ b/icolos/core/workflow_steps/confgen/omega.py @@ -0,0 +1,111 @@ +import os +from typing import List + +from pydantic import BaseModel +from rdkit import Chem +from copy import deepcopy +from icolos.utils.execute_external.omega import OMEGAExecutor +from icolos.core.workflow_steps.step import _LE, _CTE +from icolos.utils.general.molecules import get_charge_for_molecule + +from icolos.core.containers.compound import Enumeration, Conformer + +from icolos.utils.enums.program_parameters import OMEGAEnum, OMEGAOutputEnum +from icolos.core.workflow_steps.confgen.base import StepConfgenBase + +_EE = OMEGAEnum() +_COE = OMEGAOutputEnum() + + +class StepOmega(StepConfgenBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + # initialize the executor and test availability + self._initialize_backend(executor=OMEGAExecutor) + self._check_backend_availability() + + def _parse_OMEGA_result( + self, dir_path: str, enumeration: Enumeration + ) -> List[Conformer]: + # OMEGA will output a variety of files to "dir_path" + conformers_sdf = os.path.join(dir_path, _COE.OUTPUT_SDF_NAME) + + # energies are added as a tag in the output + mol_supplier = Chem.SDMolSupplier(conformers_sdf, removeHs=False) + charge = str( + get_charge_for_molecule(enumeration.get_molecule(), add_as_tag=False) + ) + result = [] + for mol_id, mol in enumerate(mol_supplier): + mol.SetProp( + _CTE.CONFORMER_ENERGY_TAG, mol.GetProp(_COE.CLASSIC_ENERGY_OUTPUT_TAG) + ) + mol.ClearProp(_COE.CLASSIC_ENERGY_OUTPUT_TAG) + mol.SetProp(_CTE.FORMAL_CHARGE_TAG, charge) + conf = Conformer(conformer=mol) + result.append(conf) + return result + + def _set_input_output_paths(self, parameters: dict, input_path: str) -> dict: + # this is handled this way to overwrite any specifications from the user for the input / output paths as well + parameters[_EE.CLASSIC_INPUT] = input_path + parameters[_EE.CLASSIC_OUTPUT] = _COE.OUTPUT_SDF_NAME + return parameters + + def _prepare_settings(self, tmp_dir: str, enumeration: Enumeration) -> list: + # the first argument is the mode of binary "oeomega" (for now defaults to "classic") + settings = [_EE.OMEGA_MODE_CLASSIC] + + # add flags + # make sure, the energy tag is set as well + for flag in self.settings.arguments.flags: + settings.append(flag) + if _EE.CLASSIC_SDENERGY not in settings: + settings.append(_EE.CLASSIC_SDENERGY) + + # add parameters + parameters = deepcopy(self.settings.arguments.parameters) + + # update / over-write fields that need a specific value or are defined elsewhere + parameters = self._set_input_output_paths( + parameters=parameters, + input_path=self._prepare_temp_input(tmp_dir, enumeration.get_molecule()), + ) + + # flatten the dictionary into a list for command-line execution + for key in parameters.keys(): + settings.append(key) + settings.append(parameters[key]) + return settings + + def execute(self): + for compound in self.get_compounds(): + for enumeration in compound.get_enumerations(): + if not self._input_object_valid(enumeration): + continue + + # set up + tmp_dir = self._move_to_temp_dir() + settings = self._prepare_settings(tmp_dir, enumeration=enumeration) + + # execution + self._logger.log( + f"Executing OMEGA backend in folder {tmp_dir}.", _LE.DEBUG + ) + result = self._backend_executor.execute( + command=_EE.OMEGA, arguments=settings, check=False + ) + self._restore_working_dir() + + # parsing + conformers = self._parse_OMEGA_result(tmp_dir, enumeration=enumeration) + enumeration.clear_conformers() + enumeration.add_conformers(conformers=conformers, auto_update=True) + self._logger.log( + f"Completed OMEGA for enumeration {enumeration.get_index_string()}, added {len(conformers)} conformers.", + _LE.INFO, + ) + + # clean-up + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/confgen/xtb.py b/icolos/core/workflow_steps/confgen/xtb.py new file mode 100644 index 0000000..34a18a8 --- /dev/null +++ b/icolos/core/workflow_steps/confgen/xtb.py @@ -0,0 +1,170 @@ +import os +from tempfile import mkdtemp + +from pydantic import BaseModel +from rdkit import Chem +from copy import deepcopy +from typing import List, Tuple +from icolos.utils.execute_external.xtb import XTBExecutor + +from icolos.utils.general.molecules import get_charge_for_molecule + +from icolos.core.containers.compound import Conformer + +from icolos.utils.enums.program_parameters import XTBEnum, XTBOutputEnum +from icolos.core.workflow_steps.step import _LE, _CTE +from icolos.core.workflow_steps.confgen.base import StepConfgenBase +from icolos.utils.general.parallelization import SubtaskContainer, Parallelizer + +_EE = XTBEnum() +_COE = XTBOutputEnum() + + +class StepXTB(StepConfgenBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + # initialize the executor and test availability + self._initialize_backend(executor=XTBExecutor) + self._check_backend_availability() + + def _parse_XTB_result(self, tmp_dirs: List, conformers: List[Conformer]): + # XTB will output a variety of files to "dir_path" + results = [] + for dir_path, conformer in zip(tmp_dirs, conformers): + optimized_conformer_sdf = os.path.join(dir_path, _COE.XTBOPT_SDF) + enum = conformer.get_enumeration_object() + # as the energies are added as a tag, but we will use ours + # note, that XTB is called to operate on one conformer at a time (which we will return here) + mol_supplier = Chem.SDMolSupplier(optimized_conformer_sdf, removeHs=False) + mol = None + try: + for mol in mol_supplier: + mol.SetProp( + _CTE.CONFORMER_ENERGY_TAG, mol.GetProp(_COE.TOTAL_ENERGY_TAG) + ) + mol.ClearProp(_COE.TOTAL_ENERGY_TAG) + mol.SetProp( + _CTE.FORMAL_CHARGE_TAG, str(get_charge_for_molecule(mol)) + ) + enum.add_conformer(Conformer(conformer=mol), auto_update=True) + results.append(_COE.SUCCESS) + + except: + self._logger.log( + f"Failed to parse XTB results for conformer {conformer.get_index_string()}", + _LE.WARNING, + ) + results.append(_COE.FAILURE) + return results + + def _prepare_batch(self, batch) -> Tuple: + # first position is the input (SDF) file; the internal input at this stage is a list of molecules + # -> write it to a temporary SDF file (undocumented input functionality) and add the path + + tmp_dirs = [] + input_files = [] + charges = [] + conformers = [] + for next_subtask_list in batch: + tmp_dir = mkdtemp() + tmp_dirs.append(tmp_dir) + for ( + subtask + ) in ( + next_subtask_list + ): # enforced as one since xtb can't handle multiple files in one call + conformer = subtask.data + conformers.append(conformer) + input_file = self._prepare_temp_input(tmp_dir, conformer.get_molecule()) + charge = get_charge_for_molecule(conformer.get_molecule()) + + charges.append(charge) + input_files.append(input_file) + return tmp_dirs, input_files, charges, conformers + + def _prepare_arguments(self, settings: List) -> List: + + # add flags + for flag in self.settings.arguments.flags: + settings.append(flag) + + # add parameters + parameters = deepcopy(self.settings.arguments.parameters) + + # flatten the dictionary into a list for command-line execution + for key in parameters.keys(): + settings.append(key) + settings.append(parameters[key]) + return settings + + def _run_subjob(self, tmp_dir: str, input_file: str, charge: int) -> None: + + work_dir = os.getcwd() + os.chdir(tmp_dir) + + arguments = [input_file, _EE.XTB_P, charge] + arguments = self._prepare_arguments( + arguments + ) # add additional parameters from config + + result = self._backend_executor.execute( + command=_EE.XTB, arguments=arguments, check=False + ) + # for line in result.stdout.split("\n"): + # self._logger_blank.log(line, _LE.DEBUG) + # # print(line) + os.chdir(work_dir) + + def _execute_xtb(self): + xtb_parallelizer = Parallelizer(func=self._run_subjob) + n = 1 + + tmp_dirs = None + while self._subtask_container.done() is False: + + next_batch = self._get_sublists(get_first_n_lists=self._get_number_cores()) + tmp_dirs, input_files, charges, conformers = self._prepare_batch(next_batch) + + _ = [sub.increment_tries() for element in next_batch for sub in element] + _ = [sub.set_status_failed() for element in next_batch for sub in element] + + self._logger.log(f"Executing xtb for batch {n}", _LE.DEBUG) + + xtb_parallelizer.execute_parallel( + tmp_dir=tmp_dirs, + input_file=input_files, + charge=charges, + ) + + results = self._parse_XTB_result(tmp_dirs, conformers) + for sublist, result in zip(next_batch, results): + assert len(sublist) == 1 + # TODO: this only works if max length sublist == 1, fine for now as that is all turbomole can handle + for task in sublist: + if result == _COE.SUCCESS: + task.set_status_success() + else: + task.set_status_failed() + + n += 1 + self._remove_temporary(tmp_dirs) + + def execute(self): + all_conformers = [] + for compound in self.get_compounds(): + for enumeration in compound.get_enumerations(): + if enumeration.get_conformers(): + for conformer in enumeration.get_conformers(): + all_conformers.append(conformer) + enumeration.clear_conformers() + self.execution.parallelization.max_length_sublists = 1 + self._subtask_container = SubtaskContainer( + max_tries=self.execution.failure_policy.n_tries + ) + self._subtask_container.load_data(all_conformers) + self._execute_xtb() + self._logger.log( + f"Completed execution of XTB for {len(all_conformers)} conformers", + _LE.DEBUG, + ) diff --git a/icolos/core/workflow_steps/gromacs/__init__.py b/icolos/core/workflow_steps/gromacs/__init__.py new file mode 100644 index 0000000..16884f7 --- /dev/null +++ b/icolos/core/workflow_steps/gromacs/__init__.py @@ -0,0 +1,10 @@ +from icolos.core.workflow_steps.gromacs.cluster import StepGMXCluster +from icolos.core.workflow_steps.gromacs.editconf import StepGMXEditConf +from icolos.core.workflow_steps.gromacs.genion import StepGMXGenion +from icolos.core.workflow_steps.gromacs.grompp import StepGMXGrompp +from icolos.core.workflow_steps.gromacs.mdrun import StepGMXMDrun +from icolos.core.workflow_steps.gromacs.pdb2gmx import StepGMXPdb2gmx +from icolos.core.workflow_steps.gromacs.solvate import StepGMXSolvate +from icolos.core.workflow_steps.gromacs.trjconv import StepGMXTrjconv +from icolos.core.workflow_steps.gromacs.clusters_ts import StepClusterTS +from icolos.core.workflow_steps.gromacs.rsmd import StepGMXrmsd diff --git a/icolos/core/workflow_steps/gromacs/base.py b/icolos/core/workflow_steps/gromacs/base.py new file mode 100644 index 0000000..06a49bd --- /dev/null +++ b/icolos/core/workflow_steps/gromacs/base.py @@ -0,0 +1,195 @@ +from icolos.core.containers.generic import GenericData +from icolos.utils.enums.execution_enums import ExecutionResourceEnum +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from pydantic import BaseModel +import os +from typing import List +from icolos.core.workflow_steps.step import StepBase +from icolos.core.workflow_steps.step import _LE +import re +from copy import deepcopy +from distutils.dir_util import copy_tree +from icolos.utils.enums.program_parameters import GromacsEnum +from icolos.utils.execute_external.batch_executor import BatchExecutor +from icolos.utils.execute_external.gromacs import GromacsExecutor + +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum +_GE = GromacsEnum() +_ERE = ExecutionResourceEnum + + +class StepGromacsBase(StepBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + def _write_input_files(self, tmp_dir): + + # Normally this should be handled by setting GMXLIB env variable, but for some programs (gmx_MMPBSA), this doesn't work and non-standard forcefields + # need to be in the working directory + if _SGE.FORCEFIELD in self.settings.additional: + copy_tree( + self.settings.additional[_SGE.FORCEFIELD], + os.path.join( + tmp_dir, self.settings.additional[_SGE.FORCEFIELD].split("/")[-1] + ), + ) + self._logger.log( + f"Copied forcefield at {self.settings.additional[_SGE.FORCEFIELD]} to the working " + f"directory at {tmp_dir}", + _LE.INFO, + ) + + self._logger.log( + f"Writing input files to working directory at {tmp_dir}", _LE.DEBUG + ) + for file in self.data.generic.get_flattened_files(): + file.write(tmp_dir) + + def _parse_arguments(self, flag_dict: dict, args: list = None) -> List: + arguments = args if args is not None else [] + # first add the settings from the command line + for key in self.settings.arguments.parameters.keys(): + arguments.append(key) + arguments.append(str(self.settings.arguments.parameters[key])) + for flag in self.settings.arguments.flags: + arguments.append(str(flag)) + for key, value in flag_dict.items(): + # only add defaults if they have not been specified in the json + if key not in arguments: + arguments.append(key) + arguments.append(value) + return arguments + + def _copy_fields_dict(self): + try: + update_dictionary = deepcopy(self.settings.additional[_SGE.FIELDS]) + return update_dictionary + except KeyError: + self._logger.log( + "Update dictionary not present, will use provided mdp file without further modification", + _LE.WARNING, + ) + return {} + + def generate_output_file(self, in_file): + parts = in_file.split(".") + return parts[0] + "_out" + "." + parts[1] + + def _modify_config_file( + self, tmp_dir: str, config_file: GenericData, update_dict: dict + ): + file_data = config_file.get_data() + for key, value in update_dict.items(): + pattern = fr"({key})(\s*=\s*)[a-zA-Z0-9\s\_]*(\s*;)" + pattern = re.compile(pattern) + matches = re.findall(pattern, file_data) + if len(matches) == 0: + self._logger.log( + f"Specified key {key} was not found in the mdp file, value was not changed!", + _LE.WARNING, + ) + else: + + file_data = re.sub(pattern, fr"\1\2 {value} \3", file_data) + self._logger.log( + f"Replaced field {key} of mdp file with value {value}", _LE.DEBUG + ) + self._logger.log(f"Final MDP file for step {self.step_id} is: ", _LE.DEBUG) + for line in file_data.split("\n"): + self._logger_blank.log(line, _LE.DEBUG) + config_file.set_data(file_data) + config_file.write(tmp_dir) + + def _generate_index_groups(self, tmp_dir): + try: + + structure = [ + f for f in os.listdir(tmp_dir) if f.endswith(_SGE.FIELD_KEY_STRUCTURE) + ] + assert len(structure) == 1 + structure = structure[0] + except AssertionError: + structure = [ + f for f in os.listdir(tmp_dir) if f.endswith(_SGE.FIELD_KEY_TPR) + ] + structure = structure[0] + + args = ["-f", structure] + ndx_list = [f for f in os.listdir(tmp_dir) if f.endswith(_SGE.FIELD_KEY_NDX)] + if len(ndx_list) == 1: + args.extend(["-n", ndx_list[0]]) + result = self._backend_executor.execute( + command=_GE.MAKE_NDX, + arguments=args, + location=tmp_dir, + check=True, + pipe_input='echo -e "q"', + ) + return result + + def construct_pipe_arguments(self, tmp_dir, params) -> str: + """ + Constructs the pipe arguments to be passed to gromacs interactive programs + """ + # look up the groups that have been passed, try to identify the group number in the corresponding index file + + result = self._generate_index_groups(tmp_dir) + output = ['echo -e "'] + for param in params.split(): + if param == "or": + output.append("|") + elif param == "and": + output.append("&") + elif param == "not": + output.append("!") + elif param == ";": + output.append("\n") + else: + added_one = False + for line in result.stdout.split("\n"): + parts = line.split() + if param in parts and param == parts[1]: + idx = parts[0] + # print("found index", idx, f"for {param}") + added_one = True + output.append(idx) + break + if not added_one: + output.append(param) + output.append('\nq"') + self._logger.log(f"Constructed pipe input {' '.join(output)}", _LE.DEBUG) + return " ".join(output) + + def _add_index_group(self, tmp_dir, pipe_input): + ndx_args_2 = [ + "-f", + self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_STRUCTURE), + "-o", + os.path.join(tmp_dir, _SGE.STD_INDEX), + ] + self._logger.log( + f"Added group to index file using command {pipe_input}", + _LE.DEBUG, + ) + result = self._backend_executor.execute( + command=_GE.MAKE_NDX, + arguments=ndx_args_2, + location=tmp_dir, + check=True, + pipe_input=self.construct_pipe_arguments(tmp_dir, pipe_input), + ) + for line in result.stdout.split("\n"): + self._logger_blank.log(line, _LE.INFO) + + def _get_gromacs_executor(self): + # return either the GromacsExecutor or batch executor depending on the running mode for the job + + if self.execution.resource == _ERE.LOCAL: + return GromacsExecutor + elif self.execution.resource == _ERE.SLURM: + return BatchExecutor + else: + raise TypeError( + f"Exeucution resource type {self.execution.resource} not recognised", + ) diff --git a/icolos/core/workflow_steps/gromacs/cluster.py b/icolos/core/workflow_steps/gromacs/cluster.py new file mode 100644 index 0000000..0c67453 --- /dev/null +++ b/icolos/core/workflow_steps/gromacs/cluster.py @@ -0,0 +1,81 @@ +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from icolos.utils.enums.program_parameters import GromacsEnum +from icolos.core.workflow_steps.gromacs.base import StepGromacsBase +from icolos.utils.execute_external.gromacs import GromacsExecutor +from pydantic import BaseModel +from icolos.core.workflow_steps.step import _LE +import os + +_GE = GromacsEnum() +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum + + +class StepGMXCluster(StepGromacsBase, BaseModel): + """ + Execute gmx cluster on a trajectory + """ + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=GromacsExecutor) + self._check_backend_availability() + + def execute(self): + tmp_dir = self._make_tmpdir() + self._write_input_files(tmp_dir) + + # give the option to run a make_ndx step preceding clustering to facilitate clustering on custom groups + if _SGE.INDEX_FLAG in self.settings.arguments.parameters.keys(): + assert ( + _SGE.STD_INDEX in os.listdir(tmp_dir) + or self.settings.additional[_SGE.MAKE_NDX_COMMAND] is not None + ) + if _SGE.STD_INDEX not in os.listdir(tmp_dir): + try: + ndx_arguments = [ + "-f", + self.data.generic.get_argument_by_extension( + _SGE.FIELD_KEY_STRUCTURE + ), + "-o", + _SGE.STD_INDEX, + ] + result = self._backend_executor.execute( + command=_GE.MAKE_NDX, + arguments=ndx_arguments, + location=tmp_dir, + check=True, + pipe_input=self.construct_pipe_arguments( + tmp_dir, self.settings.additional[_SGE.MAKE_NDX_COMMAND] + ), + ) + + except KeyError: + raise KeyError( + "If the index flag was specified, you must provide the ndx command in additional " + "settings" + ) + + flag_dict = { + "-s": self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TPR), + "-f": self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_XTC), + "-cl": "clusters.pdb", + } + arguments = self._parse_arguments(flag_dict=flag_dict) + + result = self._backend_executor.execute( + command=_GE.CLUSTER, + arguments=arguments, + location=tmp_dir, + check=True, + pipe_input=self.construct_pipe_arguments( + tmp_dir, self.settings.additional[_SBE.PIPE_INPUT] + ), + ) + + for line in result.stdout.split("\n"): + self._logger_blank.log(line, _LE.DEBUG) + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/gromacs/clusters_ts.py b/icolos/core/workflow_steps/gromacs/clusters_ts.py new file mode 100644 index 0000000..6df4c9f --- /dev/null +++ b/icolos/core/workflow_steps/gromacs/clusters_ts.py @@ -0,0 +1,88 @@ +from icolos.core.workflow_steps.gromacs.base import StepGromacsBase +from icolos.utils.enums.step_enums import StepGromacsEnum +from typing import List +from pydantic import BaseModel +from icolos.utils.execute_external.execute import Executor +from icolos.core.workflow_steps.step import _LE +from icolos.utils.enums.program_parameters import GromacsEnum +import os +import sys + +_SGE = StepGromacsEnum() +_GE = GromacsEnum() + + +class StepClusterTS(StepGromacsBase, BaseModel): + """ + Generate time-resolved cluster plots from the output of gmx cluster, relies on MDplot R package + """ + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=Executor) + + def _construct_args(self, defaults: dict) -> List: + """ + Custom method for argument construction, includes checks for required args in the config. + """ + args = [] + for key, value in self.settings.arguments.parameters.items(): + args.append("".join([key, "=", value])) + print(args) + + for value in self.settings.arguments.flags: + args.append(value) + print(args) + for key, value in defaults.items(): + if key not in self.settings.arguments.parameters.keys(): + args.append("".join([key, "=", value])) + + # do some checks to make sure the required params have been passed + for arg in [_SGE.CLUSTERS_NUMBER, _SGE.LENGTHS]: + if arg not in self.settings.arguments.parameters.keys(): + self._logger.log( + f"Argument for parameter {arg} not found in provided argument. \ + This must be specified!. If this workflow has attached stdin, \ + you can enter the value now...", + _LE.WARNING, + ) + # instead of bailing out, take input from user if process has stdin connected + if sys.stdin and sys.stdin.isatty(): + value = input(f"Provide the parameter for option {arg}>>>") + args.append("".join([key, "=", value])) + else: + self._logger.log( + f"No stdin stream detected, and cannot infer argument, step {self.step_id} may fail", + _LE.WARNING, + ) + return args + + def execute(self): + """ + Visualise time-resolved gmx cluster results. + Requires predceeding gmx_cluster step with clust-id.xvg file + (ensure -clid flag is set, and xvg file is passed to this step) + """ + + tmp_dir = self._make_tmpdir() + self.data.generic.write_out_all_files(tmp_dir) + xvg_file = self.data.generic.get_argument_by_extension(ext="xvg") + + arguments = self._construct_args( + defaults={ + "files": os.path.join(tmp_dir, xvg_file), + "size": "1500,1500", + "outformat": "png", + "outfile": "clusters_ts.png", + "timeUnit": "ns", + "title": "CLUSTERS_timeseries", + }, + ) + + self._backend_executor.execute( + command=_GE.CLUSTER_TS, arguments=arguments, location=tmp_dir, check=True + ) + + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/gromacs/do_dssp.py b/icolos/core/workflow_steps/gromacs/do_dssp.py new file mode 100644 index 0000000..825cd92 --- /dev/null +++ b/icolos/core/workflow_steps/gromacs/do_dssp.py @@ -0,0 +1,57 @@ +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from icolos.utils.enums.program_parameters import GromacsEnum +from icolos.utils.execute_external.gromacs import GromacsExecutor +from pydantic import BaseModel +from icolos.core.workflow_steps.step import _LE +from icolos.core.workflow_steps.gromacs.base import StepGromacsBase + +_GE = GromacsEnum() +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum + + +class StepGMXDoDSSP(StepGromacsBase, BaseModel): + """ + Wrapper for gmx do_dssp binary, which in turn wraps dssp + returns secondary structure data for the provided gromacs trajectory + """ + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=GromacsExecutor) + self._check_backend_availability() + + def execute(self): + tmp_dir = self._make_tmpdir() + self._write_input_files(tmp_dir) + + structure_file = self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TPR) + traj_file = self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_XTC) + arguments = self._parse_arguments( + flag_dict={"-f": traj_file, "-s": structure_file, "-ssdump": "info.dat"} + ) + + pipe_input = ( + self.construct_pipe_arguments( + tmp_dir, self.settings.additional[_SBE.PIPE_INPUT] + ) + if _SBE.PIPE_INPUT in self.settings.additional.keys() + and self.settings.additional[_SBE.PIPE_INPUT] is not None + else None + ) + + result = self._backend_executor.execute( + command=_GE.DO_DSSP, + arguments=arguments, + location=tmp_dir, + pipe_input=pipe_input, + ) + for line in result.stdout.split("\n"): + self._logger_blank.log(line, _LE.DEBUG) + self._logger.log( + f"Completed execution for {self.step_id} successfully", _LE.INFO + ) + + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/gromacs/editconf.py b/icolos/core/workflow_steps/gromacs/editconf.py new file mode 100644 index 0000000..c522c13 --- /dev/null +++ b/icolos/core/workflow_steps/gromacs/editconf.py @@ -0,0 +1,57 @@ +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from icolos.utils.enums.program_parameters import GromacsEnum +from icolos.utils.execute_external.gromacs import GromacsExecutor +from pydantic import BaseModel +from icolos.core.workflow_steps.step import _LE +from icolos.core.workflow_steps.gromacs.base import StepGromacsBase + +_GE = GromacsEnum() +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum + + +class StepGMXEditConf(StepGromacsBase, BaseModel): + """ + Wrapper for gmx editconf + """ + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=GromacsExecutor) + self._check_backend_availability() + + def execute(self): + tmp_dir = self._make_tmpdir() + self._write_input_files(tmp_dir) + + structure_file = self.data.generic.get_argument_by_extension( + _SGE.FIELD_KEY_STRUCTURE + ) + arguments = self._parse_arguments( + flag_dict={"-f": structure_file, "-o": structure_file} + ) + + pipe_input = ( + self.construct_pipe_arguments( + tmp_dir, self.settings.additional[_SBE.PIPE_INPUT] + ) + if _SBE.PIPE_INPUT in self.settings.additional.keys() + and self.settings.additional[_SBE.PIPE_INPUT] is not None + else None + ) + + result = self._backend_executor.execute( + command=_GE.EDITCONF, + arguments=arguments, + location=tmp_dir, + pipe_input=pipe_input, + ) + for line in result.stdout.split("\n"): + self._logger_blank.log(line, _LE.DEBUG) + self._logger.log( + f"Completed execution for {self.step_id} successfully", _LE.INFO + ) + + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/gromacs/genion.py b/icolos/core/workflow_steps/gromacs/genion.py new file mode 100644 index 0000000..688e64a --- /dev/null +++ b/icolos/core/workflow_steps/gromacs/genion.py @@ -0,0 +1,69 @@ +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from icolos.utils.enums.program_parameters import GromacsEnum +from icolos.core.workflow_steps.gromacs.base import StepGromacsBase +from icolos.utils.execute_external.gromacs import GromacsExecutor +from pydantic import BaseModel +from icolos.core.workflow_steps.step import _LE +import os + +_GE = GromacsEnum() +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum + + +class StepGMXGenion(StepGromacsBase, BaseModel): + """ + Wrapper for gmx genion + """ + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=GromacsExecutor) + self._check_backend_availability() + + def execute(self): + tmp_dir = self._make_tmpdir() + self._write_input_files(tmp_dir) + arguments = self._parse_arguments( + { + # input file paths are handled internally + "-o": _SGE.STD_STRUCTURE, + "-p": self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TOPOL), + "-s": self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TPR), + } + ) + result = self._backend_executor.execute( + command=_GE.GENION, + arguments=arguments, + location=tmp_dir, + pipe_input=self.construct_pipe_arguments( + tmp_dir, self.settings.additional[_SBE.PIPE_INPUT] + ), + ) + for line in result.stdout.split("\n"): + self._logger_blank.log(line, _LE.DEBUG) + self._logger.log( + f"Completed execution for {self.step_id} successfully", _LE.INFO + ) + # this is the last structural change to the topology in a regular gromacs setup, + # update the index groups here + make_ndx_args = ["-f", _SGE.STD_STRUCTURE, "-o", _SGE.STD_INDEX] + index_files = [f for f in os.listdir(tmp_dir) if f.endswith(".ndx")] + # remove any existing index files + for f in index_files: + self._remove_temporary(os.path.join(tmp_dir, f)) + # generate new index file + result = self._backend_executor.execute( + command=_GE.MAKE_NDX, + arguments=make_ndx_args, + location=tmp_dir, + check=True, + pipe_input='echo -e "1 | 12 \nq"', + ) + for line in result.stdout.split("\n"): + self._logger_blank.log(line, _LE.DEBUG) + + self._logger.log('Added index group to "index.ndx"', _LE.DEBUG) + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/gromacs/grompp.py b/icolos/core/workflow_steps/gromacs/grompp.py new file mode 100644 index 0000000..1e5508d --- /dev/null +++ b/icolos/core/workflow_steps/gromacs/grompp.py @@ -0,0 +1,125 @@ +from icolos.utils.enums.step_enums import StepGromacsEnum +from icolos.utils.enums.program_parameters import GromacsEnum +from icolos.core.workflow_steps.gromacs.base import StepGromacsBase +from icolos.utils.execute_external.gromacs import GromacsExecutor +from pydantic import BaseModel +from icolos.core.workflow_steps.step import _LE + +_GE = GromacsEnum() +_SGE = StepGromacsEnum() + + +class StepGMXGrompp(StepGromacsBase, BaseModel): + """ + Wraps gromacs preprocessor, produces tpr file preceeding mdrun step + Automatically handles coupling group updates + """ + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=GromacsExecutor) + self._check_backend_availability() + + def _auto_update_coupling_groups(self, tmp_dir): + # this will handle most straightforward cases with protein+ligand, DNA, RNA, + result = self._generate_index_groups(tmp_dir) + add_other = False + add_ions = False + + # check whether the ions and other index groups are present + for line in result.stdout.split("\n"): + parts = line.split() + + if len(parts) == 5: + if parts[1] in _GE.PRIMARY_COMPONENTS: + primary_component = parts[1] + # identify Protein, DNA, RNA + elif parts[1] == "Other": + add_other = True + elif parts[1] == _SGE.WATER_AND_IONS: + add_ions = True + + update_dict = self._copy_fields_dict() + pipe_input = "" + tc_grps = "" + if add_other: + pipe_input += f"{primary_component} or Other" + tc_grps += f"{primary_component}_Other" + else: + tc_grps += primary_component + if add_ions: + tc_grps += " " + tc_grps += _SGE.WATER_AND_IONS + else: + tc_grps += " " + tc_grps += "Water" + + update_dict[_SGE.TC_GRPS] = tc_grps + + if pipe_input: + self._add_index_group(tmp_dir, pipe_input) + + # update the mdp file with the modified coupling groups + self._modify_config_file( + tmp_dir, + self.data.generic.get_argument_by_extension( + _SGE.FIELD_KEY_MDP, rtn_file_object=True + ), + update_dict, + ) + + def execute(self): + """ + Set up required mdp file and run gmx grompp + Note that any issues with your parametrisationor system building will normally cause grompp to panic + """ + tmp_dir = self._make_tmpdir() + self._write_input_files(tmp_dir) + + # if make_ndx command has been specified in settings.additional, + # add an index group here, commonly protein_ligand or protein_other + + if ( + _SGE.MAKE_NDX_COMMAND in self.settings.additional.keys() + and self.settings.additional[_SGE.MAKE_NDX_COMMAND] is not None + ): + # normally you want your two t-coupling groups to be something like Protein_Other Water_Ions + # these can be added automatically with the "auto" keyword + if self.settings.additional[_SGE.MAKE_NDX_COMMAND] == _SGE.AUTO: + # automatically update the coupling groups, check for presence of 'ions' and 'other', + # update default coupling groups in mdp file + self._auto_update_coupling_groups(tmp_dir) + else: + # the mdp file will not be modified, coupling groups must be set correctly prior to job execution + self._add_index_group( + tmp_dir, self.settings.additional[_SGE.MAKE_NDX_COMMAND] + ) + + structure_file = self.data.generic.get_argument_by_extension( + _SGE.FIELD_KEY_STRUCTURE + ) + mdp_file = self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_MDP) + topol_file = self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TOPOL) + + args = ["-r", structure_file] if self.settings.additional["-r"] else [] + + arguments = self._parse_arguments( + flag_dict={ + "-f": mdp_file, + "-c": structure_file, + "-p": topol_file, + "-o": _SGE.STD_TPR, + }, + args=args, + ) + result = self._backend_executor.execute( + command=_GE.GROMPP, arguments=arguments, check=True, location=tmp_dir + ) + for line in result.stdout.split("\n"): + self._logger_blank.log(line, _LE.DEBUG) + self._logger.log( + f"Completed execution for {self.step_id} successfully", _LE.INFO + ) + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/gromacs/mdrun.py b/icolos/core/workflow_steps/gromacs/mdrun.py new file mode 100644 index 0000000..0739fd8 --- /dev/null +++ b/icolos/core/workflow_steps/gromacs/mdrun.py @@ -0,0 +1,67 @@ +from icolos.utils.enums.step_enums import StepGromacsEnum +from icolos.utils.enums.program_parameters import GromacsEnum +from icolos.core.workflow_steps.gromacs.base import StepGromacsBase +from pydantic import BaseModel +from icolos.core.workflow_steps.step import _LE +import os + +_GE = GromacsEnum() +_SGE = StepGromacsEnum() + + +class StepGMXMDrun(StepGromacsBase, BaseModel): + """ + Launch gmx mdrun + """ + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=self._get_gromacs_executor()) + self._check_backend_availability() + + def _get_log_file(self, tmp_dir): + """ + Find and parse the log file + """ + log_file = [f for f in os.listdir(tmp_dir) if f.endswith(".log")] + assert len(log_file) == 1 + with open(os.path.join(tmp_dir, log_file[0]), "r") as f: + data = f.readlines() + return data + + def _tail_log_file(self, tmp_dir): + """ + Log the last 50 lines of the log file to capture performance metrics from the run + + """ + log_file = self._get_log_file(tmp_dir) + + for line in log_file[-50:]: + self._logger_blank.log(line, _LE.INFO) + + def execute(self): + + tmp_dir = self._make_tmpdir() + # if we're simulating a protein, we need to modify the topol file to include the correct index groups \ + # to allow ligand restraint. This means an ndx file must be specified in the json + self._write_input_files(tmp_dir) + # append _out to the xtc file name + xtc_output_file = self.generate_output_file(_SGE.STD_XTC) + arguments = self._parse_arguments( + flag_dict={ + "-s": self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TPR), + "-c": _SGE.STD_STRUCTURE, + "-x": xtc_output_file, + } + ) + self._backend_executor.execute( + command=_GE.MDRUN, arguments=arguments, location=tmp_dir, check=True + ) + + self._tail_log_file(tmp_dir) + self._logger.log( + f"Completed execution for {self.step_id} successfully", _LE.INFO + ) + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/gromacs/mmpbsa.py b/icolos/core/workflow_steps/gromacs/mmpbsa.py new file mode 100644 index 0000000..c1d97ab --- /dev/null +++ b/icolos/core/workflow_steps/gromacs/mmpbsa.py @@ -0,0 +1,161 @@ +from subprocess import CompletedProcess +from icolos.core.containers.generic import GenericData +from typing import AnyStr, List +from icolos.core.workflow_steps.gromacs.base import StepGromacsBase +from pydantic import BaseModel +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from icolos.core.workflow_steps.step import _LE +from icolos.utils.enums.program_parameters import GromacsEnum +import os +from icolos.utils.general.files_paths import attach_root_path + +_SGE = StepGromacsEnum() +_GE = GromacsEnum() +_SBE = StepBaseEnum + + +class StepGMXmmpbsa(StepGromacsBase, BaseModel): + """ + Execute gmx_MMPBSA, calculates binding free energy of + protein-ligand complex using single trajectory approximation, + using Amber's mmpbsa.py script + """ + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(self._get_gromacs_executor()) + self._check_backend_availability() + + def _get_arg(self, ext) -> AnyStr: + return self.data.generic.get_argument_by_extension(ext) + + def _generate_amber_input_file(self) -> None: + input_file = ( + self.settings.additional[_SGE.INPUT_FILE] + if _SGE.INPUT_FILE in self.settings.additional.keys() + else None + ) + # Normally the user should provide an input file to control the mmgbsa protocol + if input_file is not None and os.path.isfile(input_file): + self._logger.log( + f"Using provided AMBER input file at {self.settings.additional[_SGE.INPUT_FILE]}", + _LE.DEBUG, + ) + with open(input_file, "r") as f: + template = GenericData(file_name="mmpbsa.in", file_data=f.read()) + else: + self._logger.log("No input file found, defaulting to template", _LE.WARNING) + # parses user arguments and creates the formatted amber input file from the user specification + with open( + attach_root_path("icolos/config/amber/default_mmpbsa.in"), "r" + ) as f: + template = GenericData(file_name="mmpbsa.in", file_data=f.read()) + + self.data.generic.add_file(template) + + def _parse_arguments(self, flag_dict: dict) -> List: + args = [] + for flag in self.settings.arguments.flags: + if flag != "-O": + args.append(flag) + for key, value in self.settings.arguments.parameters.items(): + args.append(key) + args.append(value) + for key, value in flag_dict.items(): + if key not in args: + args.append(key) + args.append(value) + + # capture output + return args + + def _run_mmpbsa(self, args, tmp_dir) -> CompletedProcess: + command = _GE.MMPBSA + self._logger.log(f"Executing mmgbsa calculation in dir {tmp_dir}", _LE.DEBUG) + result = self._backend_executor.execute( + command=command, arguments=args, check=True, location=tmp_dir + ) + for line in result.stdout.split("\n"): + self._logger_blank.log(line, _LE.INFO) + for line in result.stderr.split("\n"): + self._logger_blank.log(line, _LE.INFO) + + return result + + def _parse_coupling_groups(self, tmp_dir) -> AnyStr: + # parse the coupling groups to their indexes + output = [] + pipe_input = self.settings.additional[_SGE.COUPLING_GROUPS] + + structure = self.data.generic.get_argument_by_extension( + _SGE.FIELD_KEY_STRUCTURE + ) + arguments = ["-f", structure] + if [f for f in os.listdir(tmp_dir) if f.endswith("ndx")]: + arguments.extend(["-n", "index.ndx"]) + else: + arguments.extend(["-o", "index.ndx"]) + + result = self._backend_executor.execute( + command=_GE.MAKE_NDX, + arguments=arguments, + location=tmp_dir, + check=True, + pipe_input='echo -e "q"', + ) + for param in pipe_input.split(): + for line in result.stdout.split("\n"): + parts = line.split() + if param in line and parts[1] == param: + output.append(parts[0]) + break + self._logger.log(f"Resolved coupling groups {output}", _LE.DEBUG) + return " ".join(output) + + def _get_file_from_dir(self, tmp_dir: str, ext: str) -> AnyStr: + file = [f for f in os.listdir(tmp_dir) if f.endswith(ext)] + assert len(file) == 1 + return file[0] + + def execute(self) -> None: + """ + Execute gmx_MMPBSA + Note: execution using mpirun is not supported for stability reasons + """ + tmp_dir = self._make_tmpdir() + + self._generate_amber_input_file() + self._write_input_files(tmp_dir) + + # gmx_MMPBSA requires the coupling groups of the receptor and ligand + + # form any required coupling groups with make_ndx_command before parsing coupling groups + # e.g. combine protein + cofactor + ndx_commands = ( + self.settings.additional[_SGE.MAKE_NDX_COMMAND] + if _SGE.MAKE_NDX_COMMAND in self.settings.additional.keys() + else None + ) + if ndx_commands is not None: + # can run make_ndx multiple times for complex cases, each set of pipe imput must be separated by a semicolon + for args in ndx_commands.split(";"): + self._add_index_group(tmp_dir=tmp_dir, pipe_input=args) + flag_dict = { + "-i": _SGE.MMPBSA_IN, + "-cs": self._get_arg("tpr"), + "-cg": self._parse_coupling_groups(tmp_dir), + "-ci": self._get_file_from_dir(tmp_dir=tmp_dir, ext="ndx"), + "-ct": self._get_arg("xtc"), + "-cp": self._get_arg("top"), + # do not attempt to open the results in the GUI afterwards + "-nogui": "", + } + + flag_list = self._parse_arguments(flag_dict=flag_dict) + + result = self._run_mmpbsa(flag_list, tmp_dir) + + # parse and delete generated output + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/gromacs/pdb2gmx.py b/icolos/core/workflow_steps/gromacs/pdb2gmx.py new file mode 100644 index 0000000..bc8a33e --- /dev/null +++ b/icolos/core/workflow_steps/gromacs/pdb2gmx.py @@ -0,0 +1,455 @@ +from icolos.utils.enums.program_parameters import ( + GromacsEnum, +) +from icolos.utils.enums.step_enums import StepGromacsEnum +from pydantic import BaseModel +from icolos.core.workflow_steps.gromacs.base import StepGromacsBase +from icolos.utils.execute_external.gromacs import GromacsExecutor +from icolos.utils.execute_external.execute import Executor +from icolos.utils.execute_external.schrodinger import SchrodingerExecutor +from icolos.core.workflow_steps.step import _LE +import os +import re +from typing import AnyStr, List +from string import ascii_uppercase + +_SGE = StepGromacsEnum() +_GE = GromacsEnum() + + +class StepGMXPdb2gmx(StepGromacsBase, BaseModel): + _shell_executor: Executor = None + _antechamber_executor: Executor = None + _acpype_executor: Executor = None + _schrodinger_executor: SchrodingerExecutor = None + + def __init__(self, **data): + """ + Executes system parametrisation for gromacs MD setup + Generates GAFF params for unknown components with Antechamber + """ + super().__init__(**data) + + self._initialize_backend(executor=GromacsExecutor) + self._check_backend_availability() + self._shell_executor = Executor() + self._antechamber_executor = Executor(prefix_execution=_SGE.AMBERTOOLS_LOAD) + + def _modify_topol_file(self, tmp_dir, itp_files): + # read in the complex topol file, add the new itp files after the forcefield #include statement + with open(os.path.join(tmp_dir, _SGE.COMPLEX_TOP), "r") as f: + lines = f.readlines() + index = [idx for idx, s in enumerate(lines) if _SGE.FORCEFIELD_ITP in s][0] + new_topol = lines[: index + 1] + for file in itp_files: + new_topol.append(f'#include "{file}"\n') + for line in lines[index + 1 :]: + new_topol.append(line) + for file in itp_files: + stub = file.split(".")[0] + new_topol.append(f"{stub} 1\n") + with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "w") as f: + f.writelines(new_topol) + + # remove all but the final topol file form the paths, makes file handling cleaner later + top_files = [ + f for f in os.listdir(tmp_dir) if f.endswith("top") and f != _SGE.STD_TOPOL + ] + + for f in top_files: + os.remove(os.path.join(tmp_dir, f)) + + def _add_posre_to_topol(self, tmp_dir, lig): + """ + Add lines to topol file to invoke positional restraints for the parametrised ligands + """ + stub = lig.split(".")[0] + lig_itp = stub + ".itp" + with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "r") as f: + lines = f.readlines() + index = [idx for idx, s in enumerate(lines) if lig_itp in s][0] + new_topol = lines[: index + 1] + new_topol.append( + f"#ifdef POSRES_{stub.upper()}\n#include posre_{stub}.itp\n#endif\n" + ) + for line in lines[index + 1 :]: + new_topol.append(line) + + with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "w") as f: + f.writelines(new_topol) + + def _split_protein_ligand_complex(self, tmp_dir): + # split the file into protein and an arbitrary number of ligands and cofactors + # Handle for multiple cofactors of the same type + struct_file = [ + file for file in os.listdir(tmp_dir) if file.endswith(_SGE.FIELD_KEY_PDB) + ][0] + with open(os.path.join(tmp_dir, struct_file), "r") as f: + data = f.readlines() + # handles arbitrary number of ligands, cofactors, etc + ligand_lines = {} + protein_lines = [] + + for line in data: + parts = line.upper().split() + + # filter header lines etc + if len(parts) > 4 and parts[0] in _GE.ATOMS: + + # catch the easy cases where there is a direct match to the parametrised components against internal dict + if ( + parts[3] in _GE.AMBER_PARAMETRISED_COMPONENTS + or parts[3] in _GE.IONS + ): + + protein_lines.append(line) + + # catch cases where ions have non-standard residue names e.g. NA3 + elif parts[3][:2] in _GE.IONS and re.findall( + re.compile(fr"{parts[3][:2]}[0-9]+"), line + ): + + pattern = fr"{parts[3][:2]}[0-9]+" + pattern = re.compile(pattern) + + line = re.sub(pattern, parts[3][:2], line) + protein_lines.append(line) + + else: + # component is not parametrised, add to the ligands + if parts[4] in list(ascii_uppercase): + try: + ligand_lines[f"{parts[3]}:{parts[5]}"].append(line) + except KeyError: + # ligand key not created yet, identify by chain + res num to handle multiple identical components + ligand_lines[f"{parts[3]}:{parts[5]}"] = [line] + else: # the 5th col index is the first coord col + try: + ligand_lines[f"{parts[3]}:{parts[4]}"].append(line) + except KeyError: + ligand_lines[f"{parts[3]}:{parts[4]}"] = [line] + + for key, value in ligand_lines.items(): + # write ligand components as separate pdb files + with open(os.path.join(tmp_dir, f"{key}.pdb"), "w") as f: + f.writelines(value) + with open(os.path.join(tmp_dir, _SGE.PROTEIN_PDB), "w") as f: + f.writelines(protein_lines) + self._remove_temporary(os.path.join(tmp_dir, struct_file)) + return list(ligand_lines.keys()) + + def _parametrisation_pipeline(self, tmp_dir, input_pdb) -> None: + """ + :param tmp_dir: step's base directory + :param input_pdb: file name for the ligand being parametrised + """ + # main pipeline for producing GAFF parameters for a ligand + stub = input_pdb.split(".")[0] + output_file = stub + ".mol2" + arguments_antechamber = [ + "-i", + input_pdb, + "-o", + output_file, + "-fi", + "pdb", + "-fo", + "mol2", + "-c", + "gas", + ] + self._logger.log(f"Running antechamber on structure {input_pdb}", _LE.DEBUG) + self._antechamber_executor.execute( + command=_GE.ANTECHAMBER, + arguments=arguments_antechamber, + check=True, + location=tmp_dir, + ) + + # Step 4: run the acpype script to generate the ligand topology file for GAFF + self._logger.log(f"Running acpype on structure {input_pdb}", _LE.DEBUG) + arguments_acpype = [ + os.path.join(_GE.ACPYPE_PATH, _GE.ACPYPE_BINARY), + "-di", + output_file, + "-c", + "gas", + ] + self._antechamber_executor.execute( + command=_GE.PYTHON, arguments=arguments_acpype, location=tmp_dir, check=True + ) + # produce the ndx file for genrestr later + index_file = stub + ".ndx" + ndx_arguments = ["-f", input_pdb, "-o", index_file] + + self._backend_executor.execute( + command=_GE.MAKE_NDX, + arguments=ndx_arguments, + location=tmp_dir, + check=True, + pipe_input='echo -e "0 & ! a H* \nq"', # all system heavy atoms, excl hydrogens + ) + # generate positional restraints for the ligand + genrestr_args = [ + "-f", + input_pdb, + "-n", + index_file, + "-o", + f"posre_{stub}.itp", + "-fc", + _SGE.FORCE_CONSTANTS, + ] + self._backend_executor.execute( + command=_GE.GENRESTR, + arguments=genrestr_args, + location=tmp_dir, + check=True, + pipe_input="echo 3", + ) # this will always be the last thing on the index file + + # we no longer need the ligand ndx file + self._remove_temporary(os.path.join(tmp_dir, index_file)) + + def _sort_components(self, lig_ids: List, components: List): + """ + Ensure components go back into the concatenated pdb file in the same order as the original + """ + new_components = [] + for idx in lig_ids: + for component in components: + if idx in component: + new_components.append(component) + return new_components + + def _concatenate_structures(self, tmp_dir: str, lig_ids: List): + """ + Extract newly parametrised components, concatenate everything into a single pdb file + """ + + components = [] + for root, _, files in os.walk(tmp_dir): + for file in files: + if file.endswith("_NEW.pdb"): + components.append(os.path.join(root, file)) + components = self._sort_components(lig_ids, components) + self._logger.log(f"Found components: {components}", _LE.DEBUG) + with open(os.path.join(tmp_dir, _SGE.PROTEIN_PDB), "r") as f: + pdb_lines = f.readlines() + + for file in components: + with open(file, "r") as f: + + pdb_lines.extend(f.readlines()) + + pdb_lines = [ + l for l in pdb_lines if not any(s in l for s in ["TER", "ENDMDL", "REMARK"]) + ] + pdb_lines.extend(["TER\n", "ENDMDL\n"]) + with open(os.path.join(tmp_dir, "Complex.pdb"), "w") as f: + f.writelines(pdb_lines) + + # also deal with renaming the itp files here + for root, _, files in os.walk(tmp_dir): + for item in files: + if ( + item.endswith("GMX.itp") + and _SGE.PROTEIN_TOP not in item + and os.path.join(root, item) != os.path.join(tmp_dir, item) + ): + os.rename( + os.path.join(root, item), + os.path.join(tmp_dir, item.split("_")[0]) + ".itp", + ) + # rename the protein top to complex + os.rename( + os.path.join(tmp_dir, _SGE.PROTEIN_TOP), + os.path.join(tmp_dir, _SGE.COMPLEX_TOP), + ) + + def _extract_atomtype(self, tmp_dir: str, file: str) -> List[AnyStr]: + """ + Pull the atomtype lines out of the topol file and return them as a list, write the sanitised itp file to directory + """ + with open(os.path.join(tmp_dir, file), "r") as f: + lines = f.readlines() + start_index = None + stop_index = None + for idx, line in enumerate(lines): + if _GE.ATOMTYPES in line: + start_index = idx + if _GE.MOLECULETYPES in line: + stop_index = idx + + selection = lines[start_index:stop_index] + # remove the offending lines from the topol + remaining = lines[:start_index] + remaining.extend(lines[stop_index:]) + self._remove_temporary(os.path.join(tmp_dir, file)) + with open(os.path.join(tmp_dir, file), "w") as f: + f.writelines(remaining) + return selection + + def _remove_duplicate_atomtypes(self, atomtypes: List): + output = [atomtypes[0]] + for line in atomtypes: + if line not in output: + output.append(line) + return output + + def _modify_itp_files(self, tmp_dir): + # cut the moleculetype directives out of all the individual itp files and add them to the top of the topol + atomtype_lines = [] + # read the topol file, identify all the itp files it is #including + with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "r") as f: + topol_lines = [ + l.split()[-1].strip('"') + for l in f.readlines() + if ".itp" in l and "posre" not in l + ] + topol_lines = [l for l in topol_lines if l in os.listdir(tmp_dir)] + for file in topol_lines: + atomtype_lines.extend(self._extract_atomtype(tmp_dir, file)) + atomtype_lines = self._remove_duplicate_atomtypes(atomtype_lines) + + # write an 'atomtypes.itp' files to be included just below the forcefield, with all the atomtypes contained in the extra components + with open(os.path.join(tmp_dir, "atomtypes.itp"), "w") as f: + f.writelines(atomtype_lines) + + with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "r") as f: + lines = f.readlines() + self._remove_temporary(os.path.join(tmp_dir, _SGE.STD_TOPOL)) + index = [idx for idx, s in enumerate(lines) if _SGE.FORCEFIELD_ITP in s][0] + new_topol = lines[: index + 1] + + new_topol.append('#include "atomtypes.itp"\n') + new_topol.extend(lines[index + 1 :]) + with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "w") as f: + f.writelines(new_topol) + + def _modify_water_molecules(self, tmp_dir: str): + with open(os.path.join(tmp_dir, _SGE.COMPLEX_PDB), "r") as f: + lines = f.readlines() + + solvent = [] + # pick out the water lines + for line in lines: + if any([x in line for x in _GE.SOLVENTS]): + solvent.append(line) + for line in solvent: + lines.remove(line) + lines.extend(solvent) + for line in lines: + if any([x in line for x in _GE.TERMINATIONS]): + lines.remove(line) + + with open(os.path.join(tmp_dir, _SGE.COMPLEX_PDB), "w") as f: + f.writelines(lines) + + if solvent: + # modify the topol to put the solvent in last in the [ molecules ] directive + with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "r") as f: + lines = f.readlines() + molecule_idx = lines.index(_GE.MOLECULES) + for line in lines[molecule_idx:]: + if any([x in line for x in _GE.SOLVENTS]): + out = lines.pop(lines.index(line)) + lines.append(out) + with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "w") as f: + f.writelines(lines) + + def execute(self): + """Takes in a ligand pdb file and generates the required topology, based on the backend specified in the config json. + Currently supported AnteChamber + + Execution looks like this currently: + (1) split the protein from the other components + (2) generate the topology for the protein separately + (3) identify components to be parametrised (cofactors, ligands etc) + (4) run the parametrisation pipeline on each component in serial (reasonably fast exec time per ligand) + (4a) store the resIDs of the ligands using the file handling system to be retrieved in a later step + (5) modify the topology file to add the #include statements for the relevant itp files + (6) convert the resulting concatenated pdb file to .gro with editconf + (7) add the posres stuff to the topol file for each ligand for the subsequent equilibration steps + (8) if more than one ligand, modify the itp files to ensure all moleculetype directives are specified first + """ + + tmp_dir = self._make_tmpdir() + self._write_input_files(tmp_dir) # dump generic data fields to the tmpdir + lig_ids = self._split_protein_ligand_complex(tmp_dir) + self._logger.log( + f"Parameters will be generated for the following components: {str(lig_ids)}", + _LE.DEBUG, + ) + + # Step 2: run pdb2gmx on the protein component only + + arguments_pdb2gmx = self._parse_arguments( + flag_dict={ + "-f": os.path.join(tmp_dir, _SGE.PROTEIN_PDB), + "-o": os.path.join(tmp_dir, _SGE.PROTEIN_PDB), + "-p": _SGE.PROTEIN_TOP, + } + ) + self._backend_executor.execute( + command=_GE.PDB2GMX, arguments=arguments_pdb2gmx, location=tmp_dir + ) + + for lig in lig_ids: + input_file = lig + ".pdb" + # generate the itp files for each component, named by their PDB identifier + self._parametrisation_pipeline(tmp_dir, input_file) + + # concatenate the structures to produce Complex.pdb + if lig_ids: + self._concatenate_structures(tmp_dir, lig_ids) + # step 6: Modify protein topol file for ligand + itp_files = [ + f + for f in os.listdir(tmp_dir) + if f.endswith(".itp") + and "posre" not in f + and not any( + [x in f for x in _GE.PRIMARY_COMPONENTS] + ) # avoid any duplicated itp file entries from components of the protein already handles by pdb2gmx (TODO: makes sure this works for DNA/RNA as well) + ] + # need to sort the itp files to match the ordering from the original pdb structure + itp_files = self._sort_components(lig_ids, itp_files) + self._modify_topol_file(tmp_dir, itp_files) + + # step 10: modify the topol file to add the ligand posre file if restraints are applied + for lig in lig_ids: + self._add_posre_to_topol(tmp_dir, lig) + + # if more than two ligands present, modify the ligand itp files so all the [atomtype] directives come before the [moleculetype] directives in the full topol + if len(lig_ids) > 1: + self._modify_itp_files(tmp_dir) + + else: + # just convert the file names in place, no addition of ligands + os.rename( + os.path.join(tmp_dir, _SGE.PROTEIN_TOP), + os.path.join(tmp_dir, _SGE.STD_TOPOL), + ) + os.rename( + os.path.join(tmp_dir, _SGE.PROTEIN_PDB), + os.path.join(tmp_dir, _SGE.COMPLEX_PDB), + ) + + # step 7: run editconf to convert the combined pdb to a gro file + + # do final check to move crystallographic waters to the end of the pdb file, after + # the ligand, to ensure continuous solvent group later + self._modify_water_molecules(tmp_dir) + # and adjust the topol file to put any solvent last + + editconf_arguments = ["-f", _SGE.COMPLEX_PDB, "-o", "structure.gro"] + self._backend_executor.execute( + command=_GE.EDITCONF, + arguments=editconf_arguments, + location=tmp_dir, + check=True, + ) + + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/gromacs/rsmd.py b/icolos/core/workflow_steps/gromacs/rsmd.py new file mode 100644 index 0000000..5ad224f --- /dev/null +++ b/icolos/core/workflow_steps/gromacs/rsmd.py @@ -0,0 +1,59 @@ +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from icolos.utils.enums.program_parameters import GromacsEnum +from icolos.core.workflow_steps.gromacs.base import StepGromacsBase +from icolos.utils.execute_external.gromacs import GromacsExecutor +from pydantic import BaseModel +from icolos.core.workflow_steps.step import _LE +import os + +_GE = GromacsEnum() +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum + + +class StepGMXrmsd(StepGromacsBase, BaseModel): + """ + Run gromacs rmsd calculation on trajectory + """ + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=GromacsExecutor) + self._check_backend_availability() + + def execute(self): + + tmp_dir = self._make_tmpdir() + + # we're going to get a trajectory from a trjconv step, and a single structure to + # compare against. rmsd is computed for every step of the trj file + + # write out generic files + self._write_input_files(tmp_dir) + + # conformer coming from a Compound object + conf = self._unroll_compounds(self.data.compounds) + + conf = conf[0] + conf.write(os.path.join(tmp_dir, "reference.sdf"), format_="pdb") + + flag_dict = { + "-s": "reference.pdb", + "-f": self.data.generic.get_argument_by_extension("xtc"), + "-fit": "rot+trans", + } + + arguments = self._parse_arguments(flag_dict=flag_dict, args=["-w"]) + result = self._backend_executor.execute( + command=_GE.RMS, + arguments=arguments, + location=tmp_dir, + check=True, + pipe_input='echo -e "2\n2\n"', + ) + for line in result.stdout.split("\n"): + self._logger_blank.log(line, _LE.DEBUG) + + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/gromacs/solvate.py b/icolos/core/workflow_steps/gromacs/solvate.py new file mode 100644 index 0000000..1f30d2f --- /dev/null +++ b/icolos/core/workflow_steps/gromacs/solvate.py @@ -0,0 +1,46 @@ +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from icolos.utils.enums.program_parameters import GromacsEnum +from icolos.utils.execute_external.gromacs import GromacsExecutor +from pydantic import BaseModel +from icolos.core.workflow_steps.step import _LE +from icolos.core.workflow_steps.gromacs.base import StepGromacsBase + +_GE = GromacsEnum() +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum + + +class StepGMXSolvate(StepGromacsBase, BaseModel): + """ + Fill waterbox with solvent, executes gmx solvate + """ + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=GromacsExecutor) + self._check_backend_availability() + + def execute(self): + tmp_dir = self._make_tmpdir() + self._write_input_files(tmp_dir) + structure_file = self.data.generic.get_argument_by_extension( + _SGE.FIELD_KEY_STRUCTURE + ) + arguments = self._parse_arguments( + flag_dict={ + "-cp": structure_file, + "-p": self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TOPOL), + "-o": structure_file, + } + ) + result = self._backend_executor.execute( + command=_GE.SOLVATE, arguments=arguments, location=tmp_dir + ) + for line in result.stdout.split("\n"): + self._logger_blank.log(line, _LE.DEBUG) + self._logger.log( + f"Completed execution for {self.step_id} successfully.", _LE.INFO + ) + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/gromacs/trajcat.py b/icolos/core/workflow_steps/gromacs/trajcat.py new file mode 100644 index 0000000..c2b5fc9 --- /dev/null +++ b/icolos/core/workflow_steps/gromacs/trajcat.py @@ -0,0 +1,54 @@ +from enum import Flag +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from icolos.utils.enums.program_parameters import GromacsEnum +from icolos.core.workflow_steps.gromacs.base import StepGromacsBase +from icolos.utils.execute_external.gromacs import GromacsExecutor +from pydantic import BaseModel +from icolos.core.workflow_steps.step import _LE +import os + +_GE = GromacsEnum() +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum + + +class StepGMXTrjcat(StepGromacsBase, BaseModel): + """ + Concatenates multiple trajectories, useful for subsequent rmsd/cluster calculations + """ + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=GromacsExecutor) + self._check_backend_availability() + + def execute(self): + + tmp_dir = self._make_tmpdir() + + # write the trajectories to the tmpdir, writing to separate file names, then glob the xtc files + + for idx, file in enumerate(self.data.generic.get_files_by_extension(ext="xtc")): + file.write(path=os.path.join(tmp_dir, f"traj_{idx}.xtc"), join=False) + + flag_dict = { + "-f": "*.xtc", + "-o": "trjcat_out.xtc", + "-cat": "", # need this to paste the trajectories back to back + } + + arguments = self._parse_arguments(flag_dict=flag_dict) + result = self._backend_executor.execute( + command=_GE.TRJCAT, arguments=arguments, location=tmp_dir, check=True + ) + for line in result.stdout.split("\n"): + self._logger_blank.log(line, _LE.DEBUG) + + rm_files = [ + f for f in os.listdir(tmp_dir) if f.endswith("xtc") and "trjcat" not in f + ] + for f in rm_files: + os.remove(os.path.join(tmp_dir, f)) + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/gromacs/trjconv.py b/icolos/core/workflow_steps/gromacs/trjconv.py new file mode 100644 index 0000000..bafa0ba --- /dev/null +++ b/icolos/core/workflow_steps/gromacs/trjconv.py @@ -0,0 +1,50 @@ +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from icolos.utils.enums.program_parameters import GromacsEnum +from icolos.core.workflow_steps.gromacs.base import StepGromacsBase +from icolos.utils.execute_external.gromacs import GromacsExecutor +from pydantic import BaseModel +from icolos.core.workflow_steps.step import _LE + +_GE = GromacsEnum() +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum + + +class StepGMXTrjconv(StepGromacsBase, BaseModel): + """ + Postprocessing step for gromacs trajectories + Mostly used for removing pbc, fitting trajectory etc. + """ + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=GromacsExecutor) + self._check_backend_availability() + + def execute(self): + + tmp_dir = self._make_tmpdir() + self._write_input_files(tmp_dir) + + xtc_file = self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_XTC) + flag_dict = { + "-s": self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TPR), + "-f": xtc_file, + "-o": xtc_file, + } + + arguments = self._parse_arguments(flag_dict=flag_dict) + result = self._backend_executor.execute( + command=_GE.TRJCONV, + arguments=arguments, + location=tmp_dir, + check=True, + pipe_input=self.construct_pipe_arguments( + tmp_dir, self.settings.additional[_SBE.PIPE_INPUT] + ), + ) + for line in result.stdout.split("\n"): + self._logger_blank.log(line, _LE.DEBUG) + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/io/__init__.py b/icolos/core/workflow_steps/io/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/core/workflow_steps/io/base.py b/icolos/core/workflow_steps/io/base.py new file mode 100644 index 0000000..58e16dd --- /dev/null +++ b/icolos/core/workflow_steps/io/base.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel +from icolos.core.workflow_steps.step import StepBase +from icolos.utils.enums.step_enums import StepBaseEnum + +_SBE = StepBaseEnum + + +class StepIOBase(StepBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) diff --git a/icolos/core/workflow_steps/io/data_manipulation.py b/icolos/core/workflow_steps/io/data_manipulation.py new file mode 100644 index 0000000..01b3e69 --- /dev/null +++ b/icolos/core/workflow_steps/io/data_manipulation.py @@ -0,0 +1,248 @@ +from typing import List, Union +from pydantic import BaseModel + +from icolos.core.containers.compound import unroll_conformers +from icolos.core.step_utils.structcat_util import StructcatUtil +from icolos.core.step_utils.structconvert import StructConvert +from icolos.utils.enums.program_parameters import ( + OpenBabelEnum, + SchrodingerExecutablesEnum, +) +from icolos.utils.enums.step_enums import ( + StepDataManipulationEnum, + StepBaseEnum, + StepFilterEnum, +) +from icolos.core.workflow_steps.io.base import StepIOBase +import os +from icolos.core.workflow_steps.step import _LE +import numpy as np + +_SBE = StepBaseEnum +_SDM = StepDataManipulationEnum() +_SEE = SchrodingerExecutablesEnum() +_OE = OpenBabelEnum() +_SFE = StepFilterEnum() + + +class StepDataManipulation(StepIOBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + # extend parameters + if _SDM.ACTION not in self.settings.additional.keys(): + self.settings.additional[ + _SDM.ACTION + ] = _SDM.ACTION_ATTACH_CONFORMERS_AS_EXTRA + self._logger.log( + f"Action not specified, defaulting to {_SDM.ACTION_ATTACH_CONFORMERS_AS_EXTRA}.", + _LE.WARNING, + ) + + def _attach_conformers_as_extra(self): + # load data to match from previous step (note: no other input supported here, to avoid redudancy + # with standard input preparation) + match_compounds = ( + self.get_workflow_object() + .find_step_by_step_id(self.settings.additional[_SDM.MATCH_SOURCE]) + .clone_compounds() + ) + + # unroll for convenience, attach matches to input conformers as extra data + match_conformers = unroll_conformers(match_compounds) + for comp in self.get_compounds(): + for enum in comp: + for conf in enum: + list_matched = [ + c + for c in match_conformers + if conf.get_index_string() == c.get_index_string() + ] + conf.add_extra_data(key=_SDM.KEY_MATCHED, data=list_matched) + self._logger.log( + f"Added {len(list_matched)} conformers as extra data to conformer {conf.get_index_string()}.", + _LE.DEBUG, + ) + + def _convert_mae_to_pdb(self): + converter = StructConvert(prefix_execution=_SEE.SCHRODINGER_MODULE) + tmp_dir = self._make_tmpdir() + + # find the mae files from the input step and convert to pdb + for file in self.data.generic.get_files_by_extension("mae"): + file.write(tmp_dir) + output_file = file.get_file_name().split(".")[0] + ".pdb" + converter.mae2pdb( + os.path.join(tmp_dir, file.get_file_name()), + os.path.join(tmp_dir, output_file), + ) + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) + + def _assemble_complexes(self): + concatenator = StructcatUtil( + prefix_execution=_SEE.SCHRODINGER_MODULE, backend=_OE.OBABEL + ) + assert os.path.isfile(self.settings.additional[_SDM.RECEPTOR]) + # create a tmpdir to work in + tmp_dir = self._make_tmpdir() + # get compounds from previous step + conformers = self._unroll_compounds(self.get_compounds(), level="conformers") + for conf in conformers: + path = os.path.join(tmp_dir, f"{conf.get_index_string()}.sdf") + mol = conf.get_molecule() + conf.write(path) + concatenator.concatenate( + input_files=[ + self.settings.additional[_SDM.RECEPTOR], + path, + ], + output_file=os.path.join(tmp_dir, f"{conf.get_index_string()}.pdb"), + ) + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) + + def _filter_compounds(self): + """modifies set of input compounds according to the specification provided in the config block""" + # TODO: support ranking structures based on generic data + + top_n = self.settings.additional[_SFE.RETURN_N] + reverse = self.settings.additional[_SFE.HIGHEST_IS_BEST] + criteria = ( + self.settings.additional[_SFE.CRITERIA] + if _SFE.CRITERIA in self.settings.additional.keys() + else None + ) + aggregation = ( + self.settings.additional[_SFE.AGGREGATION] + if _SFE.AGGREGATION in self.settings.additional.keys() + else "sum" + ) + + top_conformer_list = [] + for compound in self.data.compounds: + # filter by enumeration first - return a list of the top scoring conformers for that enumeration + # this is the normal running mode, as opposed to sorting by compound, regardless of the enumeration it came from + for enumeration in compound.get_enumerations(): + enumeration.sort_conformers( + by_tag=criteria, reverse=reverse, aggregation=aggregation + ) + top_confs = enumeration.get_conformers()[:top_n] + enumeration.clear_conformers() + enumeration.add_conformers(top_confs) + # replace that enumeration's conformers with the sorted list. + # if filtering at conformer level i.e. regardless of enumeration + if self.settings.additional[_SFE.FILTER_LEVEL] == _SFE.COMPOUNDS: + for conf in top_confs: + top_conformer_list.append(conf) + if self.settings.additional[_SFE.FILTER_LEVEL] == _SFE.COMPOUNDS: + # sort the top conformers from each enumeration and attach the top n conformers to their respective enumeration, get rid of the rest + # sorted_top_confs = sorted(top_conformer_list, + # key=lambda x: x.get_molecule().GetProp(self.settings.additional[_SFE.CRITERIA]), + # reverse=reverse)[:top_n] + # sort conformers + sorted_top_confs = self._sort_conformers( + conformers=top_conformer_list, + by_tag=criteria, + reverse=reverse, + aggregation=aggregation, + ) + for compound in self.data.compounds: + for enum in compound.get_enumerations(): + enum.clear_conformers() + for conf in sorted_top_confs: + enum = conf.get_enumeration_object() + enum.add_conformer(conf) + + def _sort_conformers( + self, + conformers, + by_tag: Union[str, List[str]], + reverse: bool = True, + aggregation="sum", + ): + if isinstance(by_tag, list) and len(by_tag) == 1: + by_tag = by_tag[0] + + if isinstance(by_tag, str): + # sorting according to a single tag + conformers = sorted( + conformers, + key=lambda x: float(x.get_molecule().GetProp(by_tag)), + reverse=reverse, + ) + return conformers + # self._conformers = conformers + # self.reset_conformer_ids() + elif isinstance(by_tag, list): + # need to normalise the values, calculate max and min of each tag for that series of conformers provided + # this would allow us to compare across a series, i.e. scoring and ranking the output of all conformers in an enumeration from Glide + def normalise_tag(value, tag): + all_tag_values = [ + float(conf.get_molecule().GetProp(tag)) for conf in conformers + ] + if len(all_tag_values) == 1: + return value + else: + + max_tag = np.max(all_tag_values) + min_tag = np.min(all_tag_values) + return (float(value) - min_tag) / (max_tag - min_tag) + + # if we specify multiple tags, aggregate according the the provided aggregation function + if aggregation == "sum": + # sort by the sum of the normalised tags, + conformers = sorted( + conformers, + key=lambda x: np.sum( + [ + float(normalise_tag(x.get_molecule().GetProp(i), i)) + for i in by_tag + ] + ), + reverse=reverse, + ) + return conformers + elif aggregation == "product": + conformers = sorted( + conformers, + key=lambda x: np.product( + [ + float(normalise_tag(x.get_molecule().GetProp(i), i)) + for i in by_tag + ] + ), + reverse=reverse, + ) + return conformers + else: + raise AttributeError( + "Only sum or product aggregation modes are currently supported - ABORT" + ) + + def execute(self): + if ( + self.settings.additional[_SDM.ACTION] + == _SDM.ACTION_ATTACH_CONFORMERS_AS_EXTRA + ): + self._attach_conformers_as_extra() + elif self.settings.additional[_SDM.ACTION] == _SDM.ACTION_NO_ACTION: + n_comp, n_enum, n_conf = self.get_compound_stats() + self._logger.log( + f'Data manipulation step type "no_action" for {n_comp} compounds with {n_enum} enumerations with {n_conf} conformers completed.', + _LE.INFO, + ) + elif self.settings.additional[_SDM.ACTION] == _SDM.CONVERT_MAE_TO_PDB: + self._convert_mae_to_pdb() + elif self.settings.additional[_SDM.ACTION] == _SDM.ASSEMBLE_COMPLEXES: + # take pose conformers (sd format) and concatenate with pdb file + self._assemble_complexes() + elif self.settings.additional[_SDM.ACTION] == _SDM.COLLECT_ITERATOR_RESULTS: + # average the results coming from all iterations of the step + raise NotImplementedError + elif self.settings.additional[_SDM.ACTION] == _SDM.FILTER: + self._filter_compounds() + else: + raise ValueError( + f'Action "{self.settings.additional[_SDM.ACTION]}" not supported.' + ) diff --git a/icolos/core/workflow_steps/io/embedder.py b/icolos/core/workflow_steps/io/embedder.py new file mode 100644 index 0000000..884c522 --- /dev/null +++ b/icolos/core/workflow_steps/io/embedder.py @@ -0,0 +1,136 @@ +from copy import deepcopy + +from pydantic import BaseModel +from rdkit import Chem, RDLogger +from rdkit.Chem import AllChem + +from icolos.core.containers.compound import Conformer +from icolos.utils.general.icolos_exceptions import StepFailed +from icolos.utils.enums.step_enums import StepEmbeddingEnum +from icolos.core.workflow_steps.io.base import StepIOBase + +from icolos.core.workflow_steps.step import _LE +from icolos.utils.general.convenience_functions import * +from icolos.utils.smiles import to_mol + +_SEE = StepEmbeddingEnum() + + +class StepEmbedding(StepIOBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + # extend parameters with defaults + if _SEE.EMBED_AS not in self.settings.additional.keys(): + self.settings.additional[_SEE.EMBED_AS] = _SEE.EMBED_AS_ENUMERATIONS + self._logger.log( + f'No embedding level specified, defaulting to "{_SEE.EMBED_AS_ENUMERATIONS}".', + _LE.INFO, + ) + + def _smile_to_molecule(self, smile: str) -> Chem.Mol: + mol = to_mol(smile) + if mol is None: + self._logger.log( + f"The smile {smile} could not be transformed into a molecule and will be skipped.", + _LE.WARNING, + ) + return mol + + def _embed_with_RDKit(self, smile: str, parameters: dict) -> Chem.Mol: + molecule = self._smile_to_molecule(smile) + + # deactivate logger to suppress "missing Hs messages" + RDLogger.DisableLog("rdApp.*") + embed_code = AllChem.EmbedMolecule( + molecule, randomSeed=42, useRandomCoords=True + ) + + status = 0 + if embed_code != -1: + status = AllChem.UFFOptimizeMolecule(molecule, maxIters=600) + if status == 1: + self._logger.log( + f"The 3D coordinate generation of molecule {smile} did not converge in time.", + _LE.WARNING, + ) + else: + self._logger.log( + f"Could not embed molecule {smile} - no 3D coordinates have been generated.", + _LE.WARNING, + ) + RDLogger.EnableLog("rdApp.*") + + # add hydrogens to the molecule (if specified) + if nested_get(parameters, [_SEE.RDKIT_PROTONATE], default=True): + molecule = Chem.AddHs(molecule, addCoords=True) + + if embed_code != -1 and status == 0: + return molecule + + def _get_embedding_method(self, parameters: dict) -> str: + method = nested_get(parameters, [_SEE.METHOD], default=None) + if method is None: + error = "Embedding method not set." + self._logger.log(error, _LE.ERROR) + raise StepFailed(error) + return method.upper() + + def _embed_molecule(self, smile: str, parameters: dict) -> Chem.Mol: + method = self._get_embedding_method(parameters) + if method == _SEE.METHOD_RDKIT: + return self._embed_with_RDKit(smile, parameters) + else: + self._logger.log( + f"Specified embedding method {method} not available.", _LE.ERROR + ) + + def execute(self): + # TODO: REFACTOR + parameters = deepcopy(self.settings.arguments.parameters) + embed_as = self.settings.additional[_SEE.EMBED_AS] + for compound in self.get_compounds(): + if embed_as == _SEE.EMBED_AS_ENUMERATIONS: + for enumeration in compound.get_enumerations(): + enumeration.clear_molecule() + enumeration.clear_conformers() + molecule = self._embed_molecule( + smile=enumeration.get_smile(), parameters=parameters + ) + enumeration.set_molecule(molecule) + number_successful = len( + [ + True + for enum in compound.get_enumerations() + if enum.get_molecule() is not None + ] + ) + self._logger.log( + f"Embedding for compound {compound.get_index_string()} (name: {compound.get_name()}) completed ({number_successful} of {len(compound)} enumerations successful).", + _LE.INFO, + ) + elif embed_as == _SEE.EMBED_AS_CONFORMERS: + for enumeration in compound.get_enumerations(): + enumeration.clear_conformers() + molecule = self._embed_molecule( + smile=enumeration.get_smile(), parameters=parameters + ) + conformer = Conformer( + conformer=molecule, enumeration_object=enumeration + ) + enumeration.add_conformer(conformer, auto_update=True) + number_successful = len( + [ + True + for enum in compound.get_enumerations() + if enum[0].get_molecule() is not None + ] + ) + self._logger.log( + f"Embedding for compound {compound.get_index_string()} (name: {compound.get_name()}) completed ({number_successful} of {len(compound)} enumerations successful).", + _LE.INFO, + ) + else: + ValueError( + f'Value "{embed_as}" for parameter "embed_as" not supported.' + ) diff --git a/icolos/core/workflow_steps/io/initialize_compound.py b/icolos/core/workflow_steps/io/initialize_compound.py new file mode 100644 index 0000000..36644cf --- /dev/null +++ b/icolos/core/workflow_steps/io/initialize_compound.py @@ -0,0 +1,20 @@ +from pydantic import BaseModel + +from icolos.utils.general.icolos_exceptions import StepFailed +from icolos.core.workflow_steps.io.base import StepIOBase +from icolos.core.workflow_steps.step import _LE + + +class StepInitializeCompound(StepIOBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + def execute(self): + if len(self.data.compounds) == 0: + raise StepFailed( + "Compound initialization step failed - no Compound objects generated." + ) + self._logger.log( + f"Step {self.get_step_id()} initialized {len(self.get_compounds())} compounds.", + _LE.INFO, + ) diff --git a/icolos/core/workflow_steps/pmx/__init__.py b/icolos/core/workflow_steps/pmx/__init__.py new file mode 100644 index 0000000..1e5e115 --- /dev/null +++ b/icolos/core/workflow_steps/pmx/__init__.py @@ -0,0 +1,14 @@ +from icolos.core.workflow_steps.pmx.atomMapping import StepPMXatomMapping +from icolos.core.workflow_steps.pmx.doublebox import StepPMXdoublebox +from icolos.core.workflow_steps.pmx.genlib import StepPMXgenlib +from icolos.core.workflow_steps.pmx.gentop import StepPMXgentop +from icolos.core.workflow_steps.pmx.ligandHybrid import StepPMXligandHybrid +from icolos.core.workflow_steps.pmx.mutate import StepPMXmutate +from icolos.core.workflow_steps.pmx.abfe import StepPMXabfe +from icolos.core.workflow_steps.pmx.box_water_ions import StepPMXBoxWaterIons +from icolos.core.workflow_steps.pmx.prepare_simulations import StepPMXPrepareSimulations +from icolos.core.workflow_steps.pmx.prepare_transitions import StepPMXPrepareTransitions +from icolos.core.workflow_steps.pmx.run_analysis import StepPMXRunAnalysis +from icolos.core.workflow_steps.pmx.setup_workpath import StepPMXSetup +from icolos.core.workflow_steps.pmx.run_simulations import StepPMXRunSimulations +from icolos.core.workflow_steps.pmx.assemble_systems import StepPMXAssembleSystems diff --git a/icolos/core/workflow_steps/pmx/abfe.py b/icolos/core/workflow_steps/pmx/abfe.py new file mode 100644 index 0000000..1c78c9f --- /dev/null +++ b/icolos/core/workflow_steps/pmx/abfe.py @@ -0,0 +1,149 @@ +from icolos.core.workflow_steps.pmx.base import StepPMXBase +from pydantic import BaseModel +import os +from icolos.utils.enums.step_enums import StepGromacsEnum +from icolos.utils.execute_external.gromacs import GromacsExecutor +from icolos.utils.execute_external.pmx import PMXExecutor +from icolos.utils.enums.program_parameters import ( + GromacsEnum, + PMXEnum, + PMXAtomMappingEnum, +) + +_PE = PMXEnum() +_PAE = PMXAtomMappingEnum() +_GE = GromacsEnum() +_SGE = StepGromacsEnum() + + +class StepPMXabfe(StepPMXBase, BaseModel): + """Setup files for an ABFE calculation.""" + + _gromacs_executor: GromacsExecutor = GromacsExecutor() + + def __init__(self, **data): + super().__init__(**data) + self._initialize_backend(PMXExecutor) + self._check_backend_availability() + self._gromacs_executor = GromacsExecutor(prefix_execution=_SGE.GROMACS_LOAD) + + def _separate_protein_ligand(self): + # separate out protein and ligand lines from the written complex.pdb + + with open(os.path.join(self.work_dir, "complex.pdb"), "r") as f: + lines = f.readlines() + protein_lines = [] + ligand_lines = [] + # TODO: tighten up the logic for identifying the ligand here + for line in lines: + if "ATOM" in line: + protein_lines.append(line) + elif "HETATM" in line and "HOH" not in line: + ligand_lines.append(line) + + with open(os.path.join(self.work_dir, "protein.pdb"), "w") as f: + f.writelines(protein_lines) + + with open(os.path.join(self.work_dir, "MOL.pdb"), "w") as f: + f.writelines(ligand_lines) + + os.remove(os.path.join(self.work_dir, "complex.pdb")) + + def execute(self): + """ + Required inputs: + + Protein.top, protein.gro + + ligand.itp, ligand.gro + + + Execution: + - Separete protein and ligand from complex + - run pdb2gmx on protein -> generate protein.top, ligand.grp + - run acpype on ligand -> generate ligand.itp, ligand.gro + - run pmx abfe to set up the system, done! + """ + # use the same single dir setup as for the rest of the pmx pipeline + + assert self.work_dir is not None and os.path.isdir(self.work_dir) + + complex_file = self.data.generic.get_argument_by_extension( + "pdb", rtn_file_object=True + ) + complex_file.write(os.path.join(self.work_dir, "complex.pdb"), join=False) + + self._separate_protein_ligand() + + # parametrise the ligand, generate the itp files, top and gro files for the ligand + self._parametrisation_pipeline( + self.work_dir, include_gro=True, include_top=True + ) + + # parametrise protein + self._parametrise_protein(protein="protein.pdb", path="", output="protein.gro") + + # run abfe + + args = { + "-pt": "topol.top", + "-lt": "MOL.itp", + "-pc": "protein.gro", + "-lc": "MOL_GMX.gro", + } + self._backend_executor.execute( + command=_PE.ABFE, + arguments=self.get_arguments(args), + location=self.work_dir, + check=True, + ) + + +help_string = """ +pmx abfe -h +usage: pmx [-h] [-pt protop] [-lt ligtop] [-pc procrd] [-lc ligcrd] [--build] + [--doublebox] [--longest_axis] [--keep_intra] [--lig_ids ] + [--pro_ids ] [--restr_switch_on] [--seed int] + +This scripts helps to setup an absolute binding free energy calculation. As a +minimal input, you need to provide a structure and topology file for both the +protein (or host) and ligand (or guest) molecule. The topology is setup so to +contain restraints as defined by Boresch et al. (2003) J Phys Chem B 107(35); +these include one distance, two angles, and three dihedrals between ligand and +protein. You can either provide explicitly the atoms to be included in the +restraints, or let the script choose them automatically. + +optional arguments: + -h, --help show this help message and exit + -pt protop Input topology file for the protein. Default is + "protein.top". + -lt ligtop Input topology file for the ligand. It is expected that + all params needed for the ligand are explicitly defined + in this file. Default is "ligand.itp". + -pc procrd Input structure file in PDB or GRO format for the + protein. Default is "protein.gro". + -lc ligcrd Input structure file in PDB or GRO format for the ligand. + Default is "ligand.gro". + --build Whether to build the system (editconf, solvate, genion) + with a standard setup once the input files (top, gro) are + ready. + --doublebox Whether to use the double-system single-box setup. This + is useful for charged ligands. Default is False. + --longest_axis Whether to just place structures along the longest axis, + rather then minimising the volume. This option is + relevant only when using --doublebox. Default is False. + --keep_intra Whether to keep the LJ intramolecular interactions when + the ligand is decoupled. This option is relevant only + when using --doublebox. Default is False. + --lig_ids Three atom indices. If provided, these will be used for + the protein-ligand restraints. Otherwise they are chosen + automatically. + --pro_ids Three atom indices. If provided, these will be used for + the protein-ligand restraints. Otherwise they are chosen + automatically. + --restr_switch_on Whether to switch the restraints on or off, where "on" + means no restraints in stateA, and "off" means no + restraints in state B. Default is True (switch on). + --seed int Random seed to use when picking atoms for the restraints. + The automated restraints selection is stochastic, so if + you want to have a reproducible behaviour, provide a + random seed. +""" diff --git a/icolos/core/workflow_steps/pmx/assemble_systems.py b/icolos/core/workflow_steps/pmx/assemble_systems.py new file mode 100644 index 0000000..f2e2a64 --- /dev/null +++ b/icolos/core/workflow_steps/pmx/assemble_systems.py @@ -0,0 +1,53 @@ +from typing import Dict, List +from icolos.core.workflow_steps.pmx.base import StepPMXBase +from pydantic import BaseModel +import os +from icolos.utils.enums.program_parameters import PMXAtomMappingEnum, PMXEnum +from icolos.utils.execute_external.pmx import PMXExecutor +from icolos.utils.general.parallelization import SubtaskContainer + +_PE = PMXEnum() +_PAE = PMXAtomMappingEnum() + + +class StepPMXAssembleSystems(StepPMXBase, BaseModel): + """ + Executes the assemble_systems.py script, edges are parallelized over available cores + + """ + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=PMXExecutor) + + def execute(self): + assert self.work_dir is not None and os.path.isdir(self.work_dir) + + # get edges from the perturbation map attached to the step + edges = self.get_edges() + + # enforce one edge per task list (results in multiple batches for large maps) + self.execution.parallelization.max_length_sublists = 1 + self._subtask_container = SubtaskContainer( + max_tries=self.execution.failure_policy.n_tries + ) + self._subtask_container.load_data(edges) + self._execute_pmx_step_parallel( + run_func=self._execute_command, step_id="pmx_setup" + ) + + def _execute_command(self, edges: List, q: Dict): + + args = { + "-edges": '"' + " ".join([e.get_edge_id() for e in edges]) + '"', + "-ligand_path": os.path.join(self.work_dir, _PAE.LIGAND_DIR), + "-workPath": self.work_dir, + } + result = self._backend_executor.execute( + command=_PE.ASSEMBLE_SYSTEMS, + arguments=self.get_arguments(defaults=args), + check=True, + location=self.work_dir, + ) + q[edges[0].get_edge_id()] = result.returncode diff --git a/icolos/core/workflow_steps/pmx/atomMapping.py b/icolos/core/workflow_steps/pmx/atomMapping.py new file mode 100644 index 0000000..7bf1f76 --- /dev/null +++ b/icolos/core/workflow_steps/pmx/atomMapping.py @@ -0,0 +1,86 @@ +from typing import Dict, List +from icolos.core.containers.perturbation_map import Edge +from icolos.core.workflow_steps.pmx.base import StepPMXBase +from pydantic import BaseModel +from icolos.utils.execute_external.pmx import PMXExecutor +import os +from icolos.utils.enums.program_parameters import PMXEnum, PMXAtomMappingEnum +from icolos.utils.general.parallelization import SubtaskContainer + +_PE = PMXEnum() +_PAE = PMXAtomMappingEnum() + + +class StepPMXatomMapping(StepPMXBase, BaseModel): + """Ligand alchemy: map atoms for morphing.""" + + def __init__(self, **data): + super().__init__(**data) + self._initialize_backend(executor=PMXExecutor) + + def _prepare_arguments(self, args, output_dir): + # prepare the final set of arguments as a list + prepared_args = [] + default_args = { + "-o1": f"{output_dir}/pairs1.dat", + "-o2": f"{output_dir}/pairs2.dat", + "-opdb1": f"{output_dir}/out_pdb1.pdb", + "-opdb2": f"{output_dir}/out_pdb2.pdb", + "-opdbm1": f"{output_dir}/out_pdbm1.pdb", + "-opdbm2": f"{output_dir}/out_pdbm2.pdb", + "-score": f"{output_dir}/score.dat", + "-log": f"{output_dir}/mapping.log", + } + for key, value in args.items(): + default_args[key] = value + + for key, value in default_args.items(): + prepared_args.append(key), + prepared_args.append(value) + return prepared_args + + def _execute_command(self, edges: List[Edge], q: Dict): + assert isinstance(edges, list) + edge = edges[0] + lig1 = edge.get_source_node_name() + lig2 = edge.get_destination_node_name() + # write them to the right dir as a pdb from the outset + arguments = { + "-i1": os.path.join( + self.work_dir, + _PAE.LIGAND_DIR, + lig1, + "MOL.pdb", + ), + "-i2": os.path.join( + self.work_dir, + _PAE.LIGAND_DIR, + lig2, + "MOL.pdb", + ), + } + output_dir = os.path.join(self.work_dir, edge.get_edge_id(), _PE.HYBRID_STR_TOP) + arguments = self._prepare_arguments(args=arguments, output_dir=output_dir) + + result = self._backend_executor.execute( + command=_PE.ATOMMAPPING, + arguments=arguments, + check=True, + location=self.work_dir, + ) + q[edge.get_edge_id()] = result.returncode + + def execute(self): + # check the workflow has been configured correctly to use a shared work_dir + assert self.work_dir is not None and os.path.isdir(self.work_dir) + + edges = self.get_edges() + # enforce single edge per job queue + self.execution.parallelization.max_length_sublists = 1 + self._subtask_container = SubtaskContainer( + max_tries=self.execution.failure_policy.n_tries + ) + self._subtask_container.load_data(edges) + self._execute_pmx_step_parallel( + run_func=self._execute_command, step_id="atomMapping" + ) diff --git a/icolos/core/workflow_steps/pmx/base.py b/icolos/core/workflow_steps/pmx/base.py new file mode 100644 index 0000000..7fe0f0c --- /dev/null +++ b/icolos/core/workflow_steps/pmx/base.py @@ -0,0 +1,255 @@ +from subprocess import CompletedProcess +from pydantic import BaseModel +from icolos.core.containers.perturbation_map import PerturbationMap +from icolos.core.workflow_steps.step import StepBase +from icolos.utils.enums.program_parameters import GromacsEnum +from icolos.utils.enums.step_enums import StepGromacsEnum +from icolos.utils.execute_external.execute import Executor +from icolos.utils.execute_external.pmx import PMXExecutor +import os +from icolos.utils.general.parallelization import Parallelizer +from icolos.core.workflow_steps.step import _LE +import shutil + +_GE = GromacsEnum() +_SGE = StepGromacsEnum() + + +class StepPMXBase(StepBase, BaseModel): + + _antechamber_executor: Executor = None + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=PMXExecutor) + self._check_backend_availability() + self._antechamber_executor = Executor(prefix_execution=_SGE.AMBERTOOLS_LOAD) + + def _parametrise_protein( + self, + protein: str = "protein.pdb", + path: str = "input/protein", + output="protein.pdb", + ): + # run pdb2gmx on the protein + pdb2gmx_args = [ + "-f", + os.path.join(self.work_dir, path, protein), + "-ignh", + "-water", + self.settings.additional["water"], + "-ff", + self.settings.additional["forcefield"], + "-o", + os.path.join(self.work_dir, path, output), + ] + self._gromacs_executor.execute( + command=_GE.PDB2GMX, + arguments=pdb2gmx_args, + check=True, + location=os.path.join(self.work_dir, path), + ) + + def _clean_pdb_structure(self, tmp_dir: str) -> None: + files = [file for file in os.listdir(tmp_dir) if file.endswith("pdb")] + for file in files: + cleaned_lines = [] + with open(os.path.join(tmp_dir, file), "r") as f: + lines = f.readlines() + for line in lines: + if "ATOM" in line or "HETATM" in line: + cleaned_lines.append(line) + with open(os.path.join(tmp_dir, file), "w") as f: + f.writelines(cleaned_lines) + + def _parametrisation_pipeline(self, tmp_dir, include_top=False, include_gro=False): + # main pipeline for producing GAFF parameters for a ligand + arguments_antechamber = [ + "-i", + "MOL.pdb", + "-o", + "MOL.mol2", + "-fi", + "pdb", + "-fo", + "mol2", + "-c", + "gas", + ] + self._logger.log( + f"Running antechamber on structure {tmp_dir.split('/')[-1]}", _LE.DEBUG + ) + self._antechamber_executor.execute( + command=_GE.ANTECHAMBER, + arguments=arguments_antechamber, + check=True, + location=tmp_dir, + ) + + arguments_acpype = [ + os.path.join(_GE.ACPYPE_PATH, _GE.ACPYPE_BINARY), + "-di", + "MOL.mol2", + "-c", + "gas", + ] + self._antechamber_executor.execute( + command=_GE.PYTHON, arguments=arguments_acpype, location=tmp_dir, check=True + ) + # search the output dir for the itp file + acpype_dir = [p for p in os.listdir(tmp_dir) if p.endswith(".acpype")][0] + itp_file = [ + f + for f in os.listdir(os.path.join(tmp_dir, acpype_dir)) + if f.endswith("GMX.itp") + ][0] + shutil.copyfile( + os.path.join(tmp_dir, acpype_dir, itp_file), + # standardized name must be enforced here to make argument + # parsing easier in subsequent pmx steps + os.path.join(tmp_dir, "MOL.itp"), + ) + # for abfe calculations we need the ligand_GMX.top + .gro files as well + if include_top: + top_file = [ + f + for f in os.listdir(os.path.join(tmp_dir, acpype_dir)) + if f.endswith("GMX.top") + ][0] + shutil.copyfile( + os.path.join(tmp_dir, acpype_dir, top_file), + os.path.join(tmp_dir, top_file), + ) + if include_gro: + gro_file = [ + f + for f in os.listdir(os.path.join(tmp_dir, acpype_dir)) + if f.endswith("GMX.gro") + ][0] + shutil.copyfile( + os.path.join(tmp_dir, acpype_dir, gro_file), + os.path.join(tmp_dir, gro_file), + ) + + def _execute_pmx_step_parallel(self, run_func, step_id: str): + """ + Instantiates Icolos's parallelizer object, + runs the step's execute method, + checks the reutrn codes i.e. will error if an edge fails + """ + parallelizer = Parallelizer(func=run_func, collect_rtn_codes=True) + n = 1 + while self._subtask_container.done() is False: + + next_batch = self._get_sublists( + get_first_n_lists=self._get_number_cores() + ) # return n lists of length max_sublist_length + _ = [sub.increment_tries() for element in next_batch for sub in element] + _ = [sub.set_status_failed() for element in next_batch for sub in element] + edges = self._prepare_edges(next_batch) + # to avoid simultaneous processes logging to the same file, pass the + self._logger.log( + f"Executing {step_id} for batch {n}, containing {len(edges)} * {len(edges[0])} edges", + _LE.INFO, + ) + + rtn_codes = parallelizer.execute_parallel( + edges=edges, + ) + assert len(rtn_codes) == len(next_batch) + for idx, sublist in enumerate(next_batch): + for task in sublist: # one edge per sublist + if rtn_codes[idx] == 0: + task.set_status_success() + else: + task.set_status_failed() + + n += 1 + + def get_arguments(self, defaults: dict = None) -> list: + """ + Construct pmx-specific arguments from the step defaults, + overridden by arguments specified in the config file + """ + arguments = [] + + # add flags + for flag in self.settings.arguments.flags: + arguments.append(flag) + + # flatten the dictionary into a list for command-line execution + for key in self.settings.arguments.parameters.keys(): + arguments.append(key) + arguments.append(self.settings.arguments.parameters[key]) + + # add defaults, if not already present + if defaults is not None: + for key, value in defaults.items(): + if key not in arguments: + arguments.append(key) + arguments.append(value) + return arguments + + def get_edges(self): + """ + Inspect the map object passed to the step and extract the edge info + """ + + return self.get_workflow_object().workflow_data.perturbation_map.edges + + def get_nodes(self): + """ + return the nodes attached to the perturbation map + """ + return self.get_workflow_object().workflow_data.perturbation_map.nodes + + def _get_line_idx(self, data: list, id_str: str) -> int: + line = [e for e in data if id_str in e] + assert len(line) == 1 + line = line[0] + return data.index(line) + + def _construct_perturbation_map(self, work_dir: str, replicas: int): + # construct the perturbation map and load in the log file + log_file = self.data.generic.get_argument_by_extension( + "log", rtn_file_object=True + ) + log_file.write(work_dir) + perturbation_map = PerturbationMap( + compounds=self.data.compounds, + protein=self.data.generic.get_argument_by_extension( + "pdb", rtn_file_object=True + ), + replicas=replicas, + ) + perturbation_map.parse_map_file( + os.path.join(self.work_dir, log_file.get_file_name()) + ) + + self._logger.log( + f"Initialised perturbation map with {len(perturbation_map.get_nodes())} nodes and {len(perturbation_map.get_edges())} edges", + _LE.INFO, + ) + self.get_workflow_object().set_perturbation_map(perturbation_map) + + def _get_line_idx(self, data, id_str) -> int: + # utility to extract the line index with a specific id string + line = [e for e in data if id_str in e] + assert len(line) == 1 + line = line[0] + return data.index(line) + + def _prepare_edges(self, batch): + edges = [] + + for task in batch: + task_edges = [] + for element in task: # for now, only a single element + task_edges.append(element.data) + edges.append(task_edges) + return edges + + def _log_result(self, result: CompletedProcess): + for line in result.stderr.split("\n"): + self._logger_blank.log(line, _LE.DEBUG) diff --git a/icolos/core/workflow_steps/pmx/box_water_ions.py b/icolos/core/workflow_steps/pmx/box_water_ions.py new file mode 100644 index 0000000..2be2023 --- /dev/null +++ b/icolos/core/workflow_steps/pmx/box_water_ions.py @@ -0,0 +1,58 @@ +from typing import Dict, List +from icolos.core.workflow_steps.pmx.base import StepPMXBase +from pydantic import BaseModel +import os +from icolos.utils.enums.program_parameters import PMXAtomMappingEnum, PMXEnum +from icolos.core.workflow_steps.step import _LE +from icolos.utils.execute_external.pmx import PMXExecutor +from icolos.utils.general.parallelization import SubtaskContainer + +_PE = PMXEnum() +_PAE = PMXAtomMappingEnum() + + +class StepPMXBoxWaterIons(StepPMXBase, BaseModel): + """ + Take the prepard structure files and prepare the system, + runs editconf, solvate, genion and grompp for each system + to be simulated + """ + + # Note all paths are relative to the workdir + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=PMXExecutor) + + def execute(self): + # run the wrapper script in pmx to prepare the systems + + edges = self.get_edges() + + self.execution.parallelization.max_length_sublists = 1 + self._subtask_container = SubtaskContainer( + max_tries=self.execution.failure_policy.n_tries + ) + self._subtask_container.load_data(edges) + self._execute_pmx_step_parallel( + run_func=self._execute_command, step_id="BoxWaterIons" + ) + + def _execute_command(self, edges: List, q: Dict): + + arguments = { + "-edges": '"' + " ".join([e.get_edge_id() for e in edges]) + '"', + "-ligandPath": os.path.join(self.work_dir, _PAE.LIGAND_DIR), + "-workPath": self.work_dir, + } + + result = self._backend_executor.execute( + command=_PE.BOX_WATER_IONS, + arguments=self.get_arguments(defaults=arguments), + check=True, + location=self.work_dir, + ) + + self._logger.log("End of BoxWaterIons output", _LE.DEBUG) + # collect returncodes from subprocess + q[edges[0].get_edge_id()] = result.returncode diff --git a/icolos/core/workflow_steps/pmx/doublebox.py b/icolos/core/workflow_steps/pmx/doublebox.py new file mode 100644 index 0000000..9653cc4 --- /dev/null +++ b/icolos/core/workflow_steps/pmx/doublebox.py @@ -0,0 +1,33 @@ +from icolos.core.workflow_steps.pmx.base import StepPMXBase +from pydantic import BaseModel + + +class StepPMXdoublebox(StepPMXBase, BaseModel): + """Place two input structures into a single box.""" + + def __init__(self, **data): + super().__init__(**data) + + def execute(self): + pass + + +help_string = """ +pmx doublebox -h +usage: pmx [-h] -f1 -f2 [-o] [-r] [-d] [--longest_axis] + +Places two structures into a single box. The box is a rectangular cuboid in +which the two structures are placed in such a way as to minimise the box +volume. You can use this script to help in the setup of a calculation using +the single-box double-system approach. + +optional arguments: + -h, --help show this help message and exit + -f1 First structure in PDB or GRO format. + -f2 Second structure in PDB or GRO format. + -o Name of output file. Default is "doublebox.pdb". + -r Distance between the two structures (nm). Default is 2.5 nm. + -d Distance to the box wall (nm). Default is 1.5 nm. + --longest_axis Whether to just place structures along the longest axis, + rather then minimising the volume. Default is False. +""" diff --git a/icolos/core/workflow_steps/pmx/genlib.py b/icolos/core/workflow_steps/pmx/genlib.py new file mode 100644 index 0000000..0646297 --- /dev/null +++ b/icolos/core/workflow_steps/pmx/genlib.py @@ -0,0 +1,68 @@ +from icolos.core.workflow_steps.pmx.base import StepPMXBase +from pydantic import BaseModel + + +class StepPMXgenlib(StepPMXBase, BaseModel): + """Generate pmx ff library.""" + + def __init__(self, **data): + super().__init__(**data) + + def execute(self): + pass + + +help_string = """ +pmx genlib -h +usage: pmx [-h] [-f1 ipdb1] [-f2 ipdb2] [-o1 opdb1] [-o2 opdb2] + [--ffpath ffpath] [--fatp fatp] [--fnb fnb] [--moltype moltype] + [--noalign] [--cbeta] [--noH2Heavy] [--log log] + +The script creates hybrid structure and topology database entries (mtp and rtp) +in order to generate a pmx alchemical force field library. + +The easiest way to generate the library is to call this script from within +the folder of the force field you are interested in. + +If two pdb files (aligned on the backbone) are provided, the hybrid pdb, mtp, +and rtp files are written to file. If no pdb input file is provided, +the script uses pregenerated residues in order to build hybrid pdb, mtp, and +rtp files for all possible residue pairs, thus preparing the whole pmx ff +library. + +In addition, atomtype (-fatp) and non-bonded parameter (-fnm) files for the +introduced dummy atoms are generated. By default, these point towards the +files already present in the forcefield. In this way, the additional parameters +for the dummies are appended to the existing ff file, rather than being +written to new files. + +optional arguments: + -h, --help show this help message and exit + -f1 ipdb1 First input PDB file. Default is none provided. + -f2 ipdb2 Second input PDB file. Default is none provided. + -o1 opdb1 First output PDB file. Default is none provided. + -o2 opdb2 Second output PDB file. Default is none provided. + --ffpath ffpath Path to mutation forcefield. Default is current folder. + --fatp fatp Atom types (atp) file. If the file is + present, data is appended to it, otherwise a new + file is created. Default is "atomtypes.atp". + --fnb fnb Non-bonded (nb) types file. If the file is + present, data is appended to it, otherwise a new + file is created. Default is "ffnonbonded.itp". + --moltype moltype The type of molecule for which the library is + being built. Available options are "protein", "dna", + or "rna". Default is "protein". + --noalign Whether to align the sidechains of the two + input PDB files provided. Default it True; this flag + sets it to False. + --cbeta Whether to morph sidechain between the two + residues or to use dummy atoms to (de)couple the + whole sidechain. By default, sidechain atoms are + morphed so to minimise the size of the perturbation. + With this flag set, whole sidechains are (de)coupled + instead; i.e. all atoms after C-beta are not mapped + between the two residues. + --noH2Heavy Whether to allow hydrogen to/from heavy atoms + morphing. Default is True, this flag sets it to False. + --log log Logging level. Either "info" or "debug". Default is "info". +""" diff --git a/icolos/core/workflow_steps/pmx/gentop.py b/icolos/core/workflow_steps/pmx/gentop.py new file mode 100644 index 0000000..52f8d53 --- /dev/null +++ b/icolos/core/workflow_steps/pmx/gentop.py @@ -0,0 +1,46 @@ +from icolos.core.workflow_steps.pmx.base import StepPMXBase +from pydantic import BaseModel + + +class StepPMXgentop(StepPMXBase, BaseModel): + """Fill hybrid topology with B states.""" + + def __init__(self, **data): + super().__init__(**data) + + def execute(self): + pass + + +help_string = """ +pmx gentop -h +usage: pmx [-h] [-p topol] [-o outfile] [-ff ff] [--split] [--scale_mass] + [--scale_dih SCALE_DIH] [--norecursive] + +This script fills in the B state to a topology file (itp or top) according to +the hybrid residues present in the file. If you provide a top file with +include statemets, by default the script will run through the included itp +files too; this can turned off using the --norecursive flag. You need to use +this script after having mutated a structure file with pmx mutate, and after +having passed that mutated structure through pdb2gmx. + +optional arguments: + -h, --help show this help message and exit + -p topol Input topology file (itp or top). Default is + "topol.top" + -o outfile Output topology file. Default is "pmxtop.top" + -ff ff Force field to use. If -p is a top file, it is not + necessary to specify the forcefield, as it will be + determined automatically. If -p is an itp file, then + -ff is needed, and if not provided a list of available + ff will be shown. + --split Write separate topologies for the vdW and charge + transformations. + --scale_mass Scale the masses of morphing atoms so that dummies + have a mass of 1. + --scale_dih SCALE_DIH + Scale the dihedrals that have a dummy. + --norecursive Whether to fill the B states also for all itp files + included in the provided topology file. Default is + True. This flag sets it to False. +""" diff --git a/icolos/core/workflow_steps/pmx/ligandHybrid.py b/icolos/core/workflow_steps/pmx/ligandHybrid.py new file mode 100644 index 0000000..3719b41 --- /dev/null +++ b/icolos/core/workflow_steps/pmx/ligandHybrid.py @@ -0,0 +1,121 @@ +from icolos.core.workflow_steps.pmx.base import StepPMXBase +from pydantic import BaseModel +import os +from icolos.utils.enums.program_parameters import PMXEnum, PMXAtomMappingEnum +from icolos.core.workflow_steps.step import _LE +import numpy as np + +_PE = PMXEnum() +_PAE = PMXAtomMappingEnum() + + +class StepPMXligandHybrid(StepPMXBase, BaseModel): + """Ligand alchemy: hybrid structure/topology.""" + + def __init__(self, **data): + super().__init__(**data) + + def _execute_command(self, args): + self._backend_executor.execute( + command=_PE.LIGANDHYBRID, arguments=args, check=True, location=self.work_dir + ) + + def _prepare_arguments(self, args, output_dir): + """ + Prepare the final set of arguments as a list, config overrides defaults + """ + prepared_args = [] + default_args = { + "-pairs": f"{output_dir}/pairs1.dat", + "-oA": f"{output_dir}/mergedA.pdb", + "-oB": f"{output_dir}/mergedB.pdb", + "-oitp": f"{output_dir}/merged.itp", + "-offitp": f"{output_dir}/ffmerged.itp", + "-log": f"{output_dir}/mapping.log", + } + for key, value in args.items(): + default_args[key] = value + + for key, value in self.settings.arguments.parameters.items(): + default_args[key] = value + + for key, value in default_args.items(): + prepared_args.append(key), + prepared_args.append(value) + + for flag in self.settings.arguments.flags: + prepared_args.append(flag) + return prepared_args + + def execute(self): + assert self.work_dir is not None and os.path.isdir(self.work_dir) + + edges = self.get_edges() + total_edges = len(edges) + for idx, edge in enumerate(edges): + progress = np.round(idx / total_edges * 100, 2) + self._logger.log( + f"Executing pmx ligandHybrid for edge {edge.get_edge_id()} - {progress}% complete", + _LE.DEBUG, + ) + lig1 = edge.get_source_node_name() + lig2 = edge.get_destination_node_name() + + arguments = { + "-i1": os.path.join( + self.work_dir, + _PAE.LIGAND_DIR, + lig1, + "MOL.pdb", + ), + "-i2": os.path.join( + self.work_dir, + _PAE.LIGAND_DIR, + lig2, + "MOL.pdb", + ), + "-itp1": os.path.join(self.work_dir, _PAE.LIGAND_DIR, lig1, "MOL.itp"), + "-itp2": os.path.join(self.work_dir, _PAE.LIGAND_DIR, lig2, "MOL.itp"), + } + # write output files the hybrodStrTop directory for each edge + output_dir = os.path.join( + self.work_dir, edge.get_edge_id(), _PE.HYBRID_STR_TOP + ) + arguments = self._prepare_arguments(args=arguments, output_dir=output_dir) + + self._execute_command(arguments) + + +help_string = """ +pmx ligandHybrid -h +usage: pmx [-h] [-i1 lig1.pdb] [-i2 lig2.pdb] [-itp1 lig1.itp] + [-itp2 lig2.itp] [-pairs pairs.dat] [-n1 scaffold1.ndx] + [-n2 scaffold2.ndx] [-oA mergedA.pdb] [-oB mergedB.pdb] + [-oitp merged.itp] [-offitp ffmerged.itp] [-log hybrid.log] + [--d 0.05] [--fit] [--split] [--scDUMm 1.0] [--scDUMa 1.0] + [--scDUMd 1.0] [--deAng] + +Provided two structures and topologies, build hybrid structure/topology. + +optional arguments: + -h, --help show this help message and exit + -i1 lig1.pdb Input ligand structure 1. Default is "lig1.pdb" + -i2 lig2.pdb Input ligand structure 2. Default is "lig2.pdb" + -itp1 lig1.itp Input ligand topology 1. Default is "lig1.itp" + -itp2 lig2.itp Input ligand topology 2. Default is "lig2.itp" + -pairs pairs.dat Optional input: atom pair mapping. + -n1 scaffold1.ndx Optional input: index of atoms to consider for mol1 + -n2 scaffold2.ndx Optional input: index of atoms to consider for mol2 + -oA mergedA.pdb Output: hybrid structure based on the ligand 1. Default is "mergedA.pdb" + -oB mergedB.pdb Output: hybrid structure based on the ligand 2. Default is "mergedB.pdb" + -oitp merged.itp Output: hybrid topology. Default is "merged.itp" + -offitp ffmerged.itp Output: atomtypes for hybrid topology. Default is "ffmerged.itp" + -log hybrid.log Output: log file. Default is "hybrid.log" + --d 0.05 Optional: if -pairs not provided, distance (nm) between atoms to consider them morphable for alignment approach (default 0.05 nm). + --fit Fit mol2 onto mol1, only works if pairs.dat is provided + --split split the topology into separate transitions + --scDUMm 1.0 scale dummy masses using the counterpart atoms + --scDUMa 1.0 scale bonded dummy angle parameters + --scDUMd 1.0 scale bonded dummy dihedral parameters + --deAng decouple angles composed of 1 dummy and 2 non-dummies +""" diff --git a/icolos/core/workflow_steps/pmx/mutate.py b/icolos/core/workflow_steps/pmx/mutate.py new file mode 100644 index 0000000..d92bfbf --- /dev/null +++ b/icolos/core/workflow_steps/pmx/mutate.py @@ -0,0 +1,67 @@ +from icolos.core.workflow_steps.pmx.base import StepPMXBase +from pydantic import BaseModel + + +class StepPMXmutate(StepPMXBase, BaseModel): + """Mutate protein or DNA/RNA.""" + + def __init__(self, **data): + super().__init__(**data) + + def execute(self): + pass + + +help_string = """ +pmx mutate -h +usage: pmx [-h] [-f infile] [-fB infileB] [-o outfile] [-ff ff] + [--script script] [--keep_resid | --ref ] [--resinfo] + +This script applies mutations of residues in a structure file for subsequent +free energy calculations. It supports mutations to protein, DNA, and RNA +molecules. + +The mutation information and dummy placements are taken from the hybrid residue +database "mutres.mtp". The best way to use this script is to take a pdb/gro file +that has been written with pdb2gmx with all hydrogen atoms present. + +By default, all residues are renumbered starting from 1, so to have unique +residue IDs. If you want to keep the original residue IDs, you can use the flag +--keep_resid. In this case, you will also need to provide chain information +in order to be able to mutate the desired residue. Alternatively, if you would +like to use the original residue IDs but these have been changed, e.g. by gromacs, +you can provide a reference PDB file (with chain information too) using the --ref +flag. The input structure will be mutated according to the IDs chosen for the +reference structure after having mapped the two residue indices. + +The program can either be executed interactively or via script. The script file +simply has to consist of "residue_id target_residue_name" pairs (just with some +space between the id and the name), or "chain_id residue_id target_residue_name" +if you are keeping the original residue IDs or providing a reference structure. + +The script uses an extended one-letter code for amino acids to account for +different protonation states. Use the --resinfo flag to print the dictionary. + +optional arguments: + -h, --help show this help message and exit + -f infile Input structure file in PDB or GRO format. Default is "protein.pdb" + -fB infileB Input structure file of the B state in PDB or GRO format (optional). + -o outfile Output structure file in PDB or GRO format. Default is "mutant.pdb" + -ff ff Force field to use. If none is provided, + a list of available ff will be shown. + --script script Text file with list of mutations (optional). + --keep_resid Whether to renumber all residues or to keep the + original residue IDs. By default, all residues are + renumbered so to have unique IDs. With this flags set, + the original IDs are kept. Because the IDs might not + be unique anymore, you will also be asked to choose + the chain ID where the residue you want to mutate is. + --ref Provide a reference PDB structure from which to map + the chain and residue IDs onto the file to be mutated (-f). + This can be useful when wanting to mutate a file that + has had its residues renumbered or the chain information + removed (e.g. after gmx grompp). As in the --keep_resid + option, if --ref is chosen, you will need to provide chain + information either interactively or via the --script flag. + --resinfo Show the list of 3-letter -> 1-letter residues +""" diff --git a/icolos/core/workflow_steps/pmx/prepare_simulations.py b/icolos/core/workflow_steps/pmx/prepare_simulations.py new file mode 100644 index 0000000..40b3320 --- /dev/null +++ b/icolos/core/workflow_steps/pmx/prepare_simulations.py @@ -0,0 +1,51 @@ +from typing import Dict, List +from icolos.core.workflow_steps.pmx.base import StepPMXBase +from pydantic import BaseModel +from icolos.utils.enums.program_parameters import PMXAtomMappingEnum, PMXEnum +from icolos.utils.execute_external.pmx import PMXExecutor +from icolos.utils.general.parallelization import SubtaskContainer + +_PE = PMXEnum() +_PAE = PMXAtomMappingEnum() + + +class StepPMXPrepareSimulations(StepPMXBase, BaseModel): + """ + Prepare the tpr file for either equilibration or production simulations + + Calls pmx util entrypoint prepare_simulations.py with + list of edges and the workdir path + """ + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=PMXExecutor) + + def execute(self): + + edges = self.get_edges() + self.execution.parallelization.max_length_sublists = 1 + self._subtask_container = SubtaskContainer( + max_tries=self.execution.failure_policy.n_tries + ) + self._subtask_container.load_data(edges) + self._execute_pmx_step_parallel( + run_func=self._execute_command, step_id="pmx_prepare_sims" + ) + + def _execute_command(self, edges: List, q: Dict): + arguments = { + "-edges": '"' + " ".join([e.get_edge_id() for e in edges]) + '"', + "-workPath": self.work_dir, + "-sim_type": self.settings.additional["sim_type"], + "-replicas": self.get_workflow_object().workflow_data.perturbation_map.replicas, + } + result = self._backend_executor.execute( + command=_PE.PREPARE_SIMULATIONS, + arguments=self.get_arguments(defaults=arguments), + check=True, + location=self.work_dir, + ) + + q[edges[0].get_edge_id()] = result.returncode diff --git a/icolos/core/workflow_steps/pmx/prepare_transitions.py b/icolos/core/workflow_steps/pmx/prepare_transitions.py new file mode 100644 index 0000000..ca36588 --- /dev/null +++ b/icolos/core/workflow_steps/pmx/prepare_transitions.py @@ -0,0 +1,48 @@ +from icolos.utils.enums.logging_enums import LoggingConfigEnum +from typing import Dict, List +from icolos.core.workflow_steps.pmx.base import StepPMXBase +from pydantic import BaseModel +from icolos.utils.enums.program_parameters import PMXAtomMappingEnum, PMXEnum +from icolos.utils.execute_external.pmx import PMXExecutor +from icolos.utils.general.parallelization import SubtaskContainer + +_PE = PMXEnum() +_PAE = PMXAtomMappingEnum() +_LE = LoggingConfigEnum() + + +class StepPMXPrepareTransitions(StepPMXBase, BaseModel): + """ + Executes the pmx prepare_transitions.py entrypoint + """ + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=PMXExecutor) + + def execute(self): + edges = self.get_edges() + + self.execution.parallelization.max_length_sublists = 1 + self._subtask_container = SubtaskContainer( + max_tries=self.execution.failure_policy.n_tries + ) + self._subtask_container.load_data(edges) + self._execute_pmx_step_parallel( + run_func=self._execute_command, step_id="pmx_prepare_transitions" + ) + + def _execute_command(self, edges: List, q: Dict): + args = { + "-edges": '"' + " ".join([e.get_edge_id() for e in edges]) + '"', + "-workPath": self.work_dir, + "-replicas": self.get_workflow_object().workflow_data.perturbation_map.replicas, + } + result = self._backend_executor.execute( + command=_PE.PREPARE_TRANSITIONS, + arguments=self.get_arguments(defaults=args), + check=True, + location=self.work_dir, + ) + q[edges[0].get_edge_id()] = result.returncode diff --git a/icolos/core/workflow_steps/pmx/run_analysis.py b/icolos/core/workflow_steps/pmx/run_analysis.py new file mode 100644 index 0000000..6609e2b --- /dev/null +++ b/icolos/core/workflow_steps/pmx/run_analysis.py @@ -0,0 +1,47 @@ +from typing import Dict, List +from icolos.core.workflow_steps.pmx.base import StepPMXBase +from pydantic import BaseModel +from icolos.utils.enums.program_parameters import PMXAtomMappingEnum, PMXEnum +from icolos.utils.execute_external.pmx import PMXExecutor +from icolos.utils.general.parallelization import SubtaskContainer + +_PE = PMXEnum() +_PAE = PMXAtomMappingEnum() + + +class StepPMXRunAnalysis(StepPMXBase, BaseModel): + """ + Executes pmx run_analysis.py script + """ + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=PMXExecutor) + + def execute(self): + + edges = self.get_edges() + self.execution.parallelization.max_length_sublists = 1 + self._subtask_container = SubtaskContainer( + max_tries=self.execution.failure_policy.n_tries + ) + self._subtask_container.load_data(edges) + self._execute_pmx_step_parallel( + run_func=self._execute_command, step_id="pmx_run_analysis" + ) + + def _execute_command(self, edges: List, q: Dict): + + args = { + "-edges": '"' + " ".join([e.get_edge_id() for e in edges]) + '"', + "-workPath": self.work_dir, + "-replicas": self.get_workflow_object().workflow_data.perturbation_map.replicas, + } + result = self._backend_executor.execute( + command=_PE.RUN_ANALYSIS, + arguments=self.get_arguments(defaults=args), + check=True, + location=self.work_dir, + ) + q[edges[0].get_edge_id()] = result.returncode diff --git a/icolos/core/workflow_steps/pmx/run_simulations.py b/icolos/core/workflow_steps/pmx/run_simulations.py new file mode 100644 index 0000000..75613fb --- /dev/null +++ b/icolos/core/workflow_steps/pmx/run_simulations.py @@ -0,0 +1,58 @@ +from typing import Dict, List +from icolos.core.workflow_steps.pmx.base import StepPMXBase +from pydantic import BaseModel +import numpy as np +from icolos.utils.enums.program_parameters import PMXAtomMappingEnum, PMXEnum +from icolos.utils.execute_external.pmx import PMXExecutor +from icolos.utils.general.parallelization import SubtaskContainer + +_PE = PMXEnum() +_PAE = PMXAtomMappingEnum() + + +class StepPMXRunSimulations(StepPMXBase, BaseModel): + """ + Calls pmx run_simulations entrypoint, handles parallel execution across multiple GPUs + """ + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=PMXExecutor) + + def execute(self): + + edges = self.get_edges() + # run everything through in one batch, with multiple edges per call + self.execution.parallelization.max_length_sublists = int( + np.ceil(len(edges) / self._get_number_cores()) + ) + self._subtask_container = SubtaskContainer( + max_tries=self.execution.failure_policy.n_tries + ) + self._subtask_container.load_data(edges) + self._execute_pmx_step_parallel( + run_func=self._execute_command, step_id="pmx_run_simulations" + ) + + def _execute_command(self, edges: List, q: Dict): + """ + Execute the simulations for one edge, calling the run_sims pmx entrypoint + """ + args = { + "-edges": '"' + " ".join([e.get_edge_id() for e in edges]) + '"', + "-workPath": self.work_dir, + "-sim_type": self.settings.additional["sim_type"], + "-replicas": self.get_workflow_object().workflow_data.perturbation_map.replicas, + } + for key, value in self.settings.arguments.parameters: + args[key] = value + + result = self._backend_executor.execute( + command=_PE.RUN_SIMULATIONS, + arguments=self.get_arguments(defaults=args), + check=True, + location=self.work_dir, + ) + + q[edges[0].get_edge_id()] = result.returncode diff --git a/icolos/core/workflow_steps/pmx/setup_workpath.py b/icolos/core/workflow_steps/pmx/setup_workpath.py new file mode 100644 index 0000000..117c608 --- /dev/null +++ b/icolos/core/workflow_steps/pmx/setup_workpath.py @@ -0,0 +1,192 @@ +from icolos.core.containers.perturbation_map import Node +import os +from typing import Dict +from pydantic import BaseModel +from icolos.core.workflow_steps.pmx.base import StepPMXBase +from icolos.utils.enums.program_parameters import GromacsEnum +from icolos.utils.enums.step_enums import StepGromacsEnum +from icolos.utils.execute_external.execute import Executor +from icolos.utils.execute_external.gromacs import GromacsExecutor +from icolos.utils.general.parallelization import SubtaskContainer + +_GE = GromacsEnum() +_SGE = StepGromacsEnum() + + +class StepPMXSetup(StepPMXBase, BaseModel): + """ + Create the directory tree structure. + Requires the pmx workflow to be executing using the single_dir running mode + Operates on the perturbation map object, runs acpype + on the written structures to produce the amber-compatible itp files + """ + + _gromacs_executor: GromacsExecutor = None + _antechamber_executor: Executor = None + + def __init__(self, **data): + super().__init__(**data) + self._gromacs_executor = GromacsExecutor(prefix_execution=_SGE.GROMACS_LOAD) + + def _separate_atomtypes(self, lig_path: str) -> None: + with open(os.path.join(lig_path, "MOL.itp"), "r") as f: + itp_lines = f.readlines() + + start_idx = self._get_line_idx(itp_lines, _GE.ATOMTYPES) + stop_index = self._get_line_idx(itp_lines, _GE.MOLECULETYPES) + + atomtype_lines = itp_lines[start_idx:stop_index] + cleaned_itp_lines = itp_lines[stop_index:] + with open(os.path.join(lig_path, "MOL.itp"), "w") as f: + f.writelines(cleaned_itp_lines) + + # process the atomtype lines to remove the bondtype + # col causes gmx to complain + cleaned_atomtype_lines = [] + for line in atomtype_lines: + parts = line.split() + if len(parts) > 5: + cleaned_parts = [parts[0]] + parts[2:] + ["\n"] + cleaned_atomtype_lines.append(" ".join(cleaned_parts)) + with open(os.path.join(lig_path, "ffMOL.itp"), "w") as f: + f.writelines(cleaned_atomtype_lines) + + def execute(self): + # sets the number of replicas to be used throughput the pmx run + replicas = ( + self.settings.additional["replicas"] + if "replicas" in self.settings.additional.keys() + else 3 + ) + assert self.work_dir is not None and os.path.isdir(self.work_dir) + self._construct_perturbation_map(self.work_dir, replicas) + # create the directory structure for subsequent calculations + edges = self.get_edges() + nodes = self.get_nodes() + + # create the input directory to sit at the top level of the workdir, contains ligands, + # mdp and protein topology files + os.makedirs(os.path.join(self.work_dir, "input"), exist_ok=True) + for folder in ["ligands", "mdp", "protein"]: + os.makedirs(os.path.join(self.work_dir, "input", folder), exist_ok=True) + + # handle protein parametrization with pdb2gmx + protein = ( + self.get_workflow_object().workflow_data.perturbation_map.get_protein() + ) + protein.write(os.path.join(self.work_dir, "input/protein")) + + self._parametrise_protein(protein=protein.get_file_name(), path="input/protein") + + # remove the backup file + old_protein = [ + f + for f in os.listdir(os.path.join(self.work_dir, "input/protein")) + if f.endswith("#") + ] + # only want the parametrised processed pdb file in there + old_protein.append(protein.get_file_name()) + for f in old_protein: + os.remove(os.path.join(self.work_dir, "input/protein", f)) + + existing_itp_files = [ + f + for f in os.listdir(os.path.join(self.work_dir, "input/protein")) + if f.endswith("itp") and f.startswith("Protein") + ] + if ( + not existing_itp_files + ): # no protein itp files, we have a single chain that needs extacting from the top file + with open(os.path.join(self.work_dir, "input/protein/topol.top"), "r") as f: + top_lines = f.readlines() + + moltype_line = self._get_line_idx(top_lines, _GE.MOLECULETYPES) + + end_itp_line = self._get_line_idx(top_lines, "; Include water topology") + + moltype = top_lines[moltype_line + 2].split()[0] + cleaned_top = ( + top_lines[:moltype_line] + + [f'#include "topol_{moltype}.itp'] + + top_lines[end_itp_line:] + ) + + itp_lines = top_lines[moltype_line:end_itp_line] + + with open(os.path.join(self.work_dir, "input/protein/topol.top"), "w") as f: + f.writelines(cleaned_top) + + with open( + os.path.join(self.work_dir, f"input/protein/topol_{moltype}.itp"), "w" + ) as f: + f.writelines(itp_lines) + + mdp_dir = self.data.generic.get_argument_by_extension( + ext="mdp", rtn_file_object=True + ) + mdp_dir.write(os.path.join(self.work_dir, "input/mdp")) + + # parallelize the antechamber call across the pool of nodes + + self.execution.parallelization.max_length_sublists = 1 + self._subtask_container = SubtaskContainer( + max_tries=self.execution.failure_policy.n_tries + ) + self._subtask_container.load_data(nodes) + self._execute_pmx_step_parallel( + run_func=self._parametrise_nodes, step_id="pmx_setup" + ) + + # create the output folder structure + for edge in edges: + edgepath = os.path.join( + self.work_dir, + str(f"{edge.node_from.get_node_hash()}_{edge.node_to.get_node_hash()}"), + ) + hybridTopFolder = f"{edgepath}/hybridStrTop" + os.makedirs(hybridTopFolder, exist_ok=True) + + # water/protein + for wp in ["water", "protein"]: + wppath = f"{edgepath}/{wp}" + os.makedirs(wppath, exist_ok=True) + + # stateA/stateB + for state in ["stateA", "stateB"]: + statepath = f"{wppath}/{state}" + os.makedirs(statepath, exist_ok=True) + + # run1/run2/run3 + for r in range(1, replicas + 1): + runpath = f"{statepath}/run{r}" + os.makedirs(runpath, exist_ok=True) + + # em/eq_posre/eq/transitions + for sim in ["em", "eq", "transitions"]: + simpath = f"{runpath}/{sim}".format(runpath, sim) + os.makedirs(simpath, exist_ok=True) + + # TODO: sort out nomenclature here + def _parametrise_nodes(self, edges: Node, q: Dict): + # because we use the base-class infrastructure to parallelize, arg names are awkward + # in this case, we parallize over nodes, not edges! + if isinstance(edges, list): + node = edges[0] + else: + node = edges + lig_path = os.path.join(self.work_dir, "input", "ligands", node.get_node_hash()) + os.makedirs(lig_path, exist_ok=True) + node.conformer.write(os.path.join(lig_path, "MOL.sdf"), format_="pdb") + + # clean the written pdb, remove anything except hetatm/atom lines + self._clean_pdb_structure(lig_path) + # now run ACPYPE on the ligand to produce the topology file + self._parametrisation_pipeline(lig_path) + + # produces MOL.itp, need to separate the atomtypes directive out into ffMOL.itp for pmx + # to generate the forcefield later + self._separate_atomtypes(lig_path) + + # if we get through to here, return exit status 0 + + q[node.get_node_id()] = 0 diff --git a/icolos/core/workflow_steps/prediction/__init__.py b/icolos/core/workflow_steps/prediction/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/core/workflow_steps/prediction/active_learning.py b/icolos/core/workflow_steps/prediction/active_learning.py new file mode 100644 index 0000000..0ff0621 --- /dev/null +++ b/icolos/core/workflow_steps/prediction/active_learning.py @@ -0,0 +1,267 @@ +from typing import List +import os +import random +import pickle + +from modAL.acquisition import max_EI +from modAL.models.learners import BayesianOptimizer +from pydantic.main import BaseModel + +from sklearn.gaussian_process.kernels import WhiteKernel, RBF +from sklearn.gaussian_process import GaussianProcessRegressor + +from icolos.core.containers.compound import Compound, Enumeration +from icolos.core.workflow_steps.step import StepBase +from icolos.core.workflow_steps.step import _LE +from icolos.utils.enums.step_enums import ( + StepBaseEnum, + StepGlideEnum, + StepActiveLearningEnum, +) + +from rdkit.Chem.AllChem import GetMorganFingerprintAsBitVect +from rdkit.Chem import PandasTools, Mol +import pandas as pd +from pandas.core.frame import DataFrame +import numpy as np +from sklearn.metrics import mean_squared_error +from icolos.utils.enums.step_initialization_enum import StepInitializationEnum + +from icolos.utils.general.convenience_functions import nested_get + + +_SGE = StepGlideEnum() +_SALE = StepActiveLearningEnum() +_IE = StepInitializationEnum() + + +class StepActiveLearning(StepBase, BaseModel): + """ + Class to run an active learning framework + Primarily designed for building QSAR models using a physics based method (embedding + docking) as an oracle + + Takes the step conf for the oracle as an additional argument. The step with these settings is run with the queried compounds at each stage of the active learning loop + """ + + def __init__(self, **data): + super().__init__(**data) + + def _initialize_oracle(self, compound_list: List[pd.Series]) -> List[StepBase]: + # list of step configs + base_oracle_config = self.settings.additional["oracle_config"] + oracle_steps = [] + for step in base_oracle_config: + oracle_steps.append(self._initialize_oracle_step_from_dict(step)) + + # manually attach the compound objects to the oracle's lead step + # subsequent steps should take their input from the the first step. + for idx, compound in enumerate(compound_list): + cmp = Compound(compound_number=idx) + cmp.add_enumeration( + Enumeration( + compound_object=cmp, + smile=compound[_SALE.SMILES], + molecule=compound[_SALE.MOLECULE], + ) + ) + oracle_steps[0].data.compounds.append(cmp) + + return oracle_steps + + def query_oracle(self, compound_list: List[Mol]) -> List: + """ + Interface function with the oracle method, in the most likely case this is ligprep + docking + + Takes the requested compounds and runs them through the oracle workflow, returns the final compounds with annotations + + Notes: + This could be an arbitrarily complex workflow, but the only thing that's going to change is the compounds. + """ + # initialize the basic oracle, load the query compounds for evaluation + oracle_steps = self._initialize_oracle(compound_list) + # we have a fully initialized step with the compounds loaded. Execute them + for idx, step in enumerate(oracle_steps): + # for subsequent steps we will need to read in from the previous one + if idx != 0: + step.generate_input() + step.execute() + step.process_write_out() + + # retrieve compounds from the final step + final_compounds = oracle_steps[-1].data.compounds + return final_compounds + + def _extract_final_scores( + self, compounds: List[Compound], criteria: str, highest_is_best: bool = False + ) -> List[float]: + """ + Takes a list of compound objects from the oracle and extracts the best score based on the provided criteria + """ + top_scores = [] + for comp in compounds: + scores = [] + for enum in comp.get_enumerations(): + for conf in enum.get_conformers(): + scores.append(float(conf._conformer.GetProp(criteria))) + + # if docking generated no conformers + # we probably want to filter these before the model sees them + if not scores: + scores.append(0.0) + + best_score = max(scores) if highest_is_best else min(scores) + top_scores.append(best_score) + + return top_scores + + def _generate_library(self) -> DataFrame: + """ + Loads the library file from disk + This should be a .sdf file with the pre-embedded compounds from a library enumeration or such + """ + lib_path = self.settings.additional[_SALE.VIRTUAL_LIB] + assert lib_path.endswith(".sdf") + + # hold the lib in a pandas df + library = PandasTools.LoadSDF( + lib_path, + smilesName=_SALE.SMILES, + molColName=_SALE.MOLECULE, + includeFingerprints=True, + removeHs=False, + embedProps=True, + ) + # need the morgan fingerprints in the df + library[_SALE.MORGAN_FP] = library.apply( + lambda x: np.array( + GetMorganFingerprintAsBitVect(x[_SALE.MOLECULE], 2, nBits=2048) + ), + axis=1, + ) + + return library + + def _prepare_initial_data(self, lib: pd.DataFrame): + initial_compound_idx = random.sample( + range(len(lib)), int(self.settings.additional[_SALE.INIT_SAMPLES]) + ) + data_rows = [lib.iloc[idx] for idx in initial_compound_idx] + # return annotated compound list + annotated_compounds = self.query_oracle(data_rows) + + # extract top score per compound + init_scores: List[float] = self._extract_final_scores( + annotated_compounds, criteria=_SGE.GLIDE_DOCKING_SCORE + ) + init_compounds = np.array([row[_SALE.MORGAN_FP] for row in data_rows]) + + return init_compounds, init_scores + + def _prepare_validation_data(self): + """ + parses sdf file with results to dataframe, extract fingerprints + results + """ + val_lib = PandasTools.LoadSDF( + self.settings.additional[_SALE.VALIDATION_LIB], + smilesName=_SALE.SMILES, + molColName=_SALE.MOLECULE, + includeFingerprints=True, + removeHs=False, + embedProps=True, + ) + # need the morgan fingerprints in the df + val_lib[_SALE.MORGAN_FP] = val_lib.apply( + lambda x: np.array( + GetMorganFingerprintAsBitVect(x[_SALE.MOLECULE], 2, nBits=2048) + ), + axis=1, + ) + scores = list( + pd.to_numeric(val_lib[self.settings.additional[_SALE.CRITERIA]].fillna(0)) + ) + scores = [float(x) for x in scores] + return list(val_lib[_SALE.MORGAN_FP]), scores + + def _filter_oracle_results( + self, compound_rows: List[pd.Series], scores: List[float] + ): + final_compounds, final_scores = [], [] + for cmp, score in zip(compound_rows, scores): + if score != 0.0: + final_compounds.append(cmp) + final_scores.append(score) + + return final_compounds, final_scores + + def execute(self): + tmp_dir = self._make_tmpdir() + + # TODO: Implement comittee model + + # start with sdf of pre-calculatd ligand embeddings for each full peptide in the library + lib = self._generate_library() + init_compounds, init_scores = self._prepare_initial_data(lib) + # load validation set for later + validation_compounds, validation_scores = self._prepare_validation_data() + + kernel = RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) + WhiteKernel( + noise_level=1, noise_level_bounds=(1e-10, 1e2) + ) + learner = BayesianOptimizer( + # estimator=GaussianProcessRegressor(kernel=kernel), + estimator=GaussianProcessRegressor(kernel), + query_strategy=max_EI, + X_training=init_compounds, + y_training=init_scores, + ) + + for idx in range(int(self.settings.additional[_SALE.N_ROUNDS])): + # generate the requested points from the learner + query_idx, _ = learner.query( + list(lib[_SALE.MORGAN_FP]), + n_instances=int(self.settings.additional[_SALE.BATCH_SIZE]), + ) + # generate oracle input + query_compounds = [lib.iloc[int(idx)] for idx in query_idx] + # query oracle + + compounds = self.query_oracle(query_compounds) + scores = self._extract_final_scores( + compounds, self.settings.additional[_SALE.CRITERIA] + ) + # some of the scores will be zero if they didn't dock, do we want to filter these out, only hand back those compounds with a non-zero score? + query_compounds, scores = self._filter_oracle_results( + query_compounds, scores + ) + + learner.teach( + np.array([compound[_SALE.MORGAN_FP] for compound in query_compounds]), + scores, + ) + # need a held-out test set with docking scores already computed + performance = learner.score(validation_compounds, validation_scores) + self._logger.log( + f"Round {idx +1}; val set correlation: {performance}", _LE.INFO + ) + # get the predictions + predictions = learner.predict(validation_compounds) + mse = mean_squared_error(validation_scores, predictions) + self._logger.log(f"Round {idx+1}; rmse: {np.sqrt(mse)}", _LE.INFO) + + # pickle the final model + with open(os.path.join(tmp_dir, "model.pkl"), "wb") as f: + pickle.dump(learner, f) + + self._parse_output(tmp_dir) + + def _initialize_oracle_step_from_dict(self, step_conf: dict) -> StepBase: + # note this is a bit of a hack to get around a circular import, we can't use the main util + _STE = StepBaseEnum + step_type = nested_get(step_conf, _STE.STEP_TYPE, default=None) + step_type = None if step_type is None else step_type.upper() + if step_type in _IE.STEP_INIT_DICT.keys(): + return _IE.STEP_INIT_DICT[step_type](**step_conf) + else: + raise ValueError( + f"Backend for step {nested_get(step_conf, _STE.STEPID, '')} unknown." + ) diff --git a/icolos/core/workflow_steps/prediction/model_building.py b/icolos/core/workflow_steps/prediction/model_building.py new file mode 100644 index 0000000..8bd9161 --- /dev/null +++ b/icolos/core/workflow_steps/prediction/model_building.py @@ -0,0 +1,269 @@ +import json +import os +import numpy as np +import pandas as pd +from collections import OrderedDict +from copy import deepcopy +from typing import Tuple, List + +from pydantic import BaseModel + +from icolos.core.containers.compound import Conformer +from icolos.core.containers.generic import GenericData +from icolos.utils.enums.program_parameters import ModelBuilderEnum +from icolos.utils.enums.step_enums import StepModelBuilderEnum +from icolos.core.workflow_steps.io.base import StepIOBase +from icolos.core.workflow_steps.step import _LE, StepSettingsParameters +from icolos.utils.enums.write_out_enums import WriteOutEnum +from icolos.utils.execute_external.execute import Executor + +_SMBE = StepModelBuilderEnum() +_SME = ModelBuilderEnum() +_WE = WriteOutEnum() + + +class StepModelBuilder(StepIOBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + # initialize the executor + self._initialize_backend(executor=Executor) + + def _generate_temporary_input_output_files( + self, tmp_dir: str + ) -> Tuple[str, str, str, str, str]: + tmp_input_config_json = os.path.join(tmp_dir, _SMBE.TMP_INPUT_CONFIG) + tmp_input_data_csv = os.path.join(tmp_dir, _SMBE.TMP_INPUT_DATA) + tmp_output_best_model_pkl = os.path.join(tmp_dir, _SMBE.TMP_OUTPUT_BEST_MODEL) + tmp_output_best_parameters_json = os.path.join( + tmp_dir, _SMBE.TMP_OUTPUT_BEST_PARAMETERS + ) + tmp_output_production_pkl = os.path.join( + tmp_dir, _SMBE.TMP_OUTPUT_PRODUCTION_MODEL + ) + return ( + tmp_input_config_json, + tmp_input_data_csv, + tmp_output_best_model_pkl, + tmp_output_best_parameters_json, + tmp_output_production_pkl, + ) + + def _update_data_block( + self, conf: dict, tmp_input_data_csv: str, settings: StepSettingsParameters + ) -> dict: + # the user can specify additional things for the "data" block of the configuration + # in the "additional" field; the input CSV file needs to be overwritten in every case, though + specified_data_block = settings.additional.get(_SMBE.DATA, {}) + for key in specified_data_block.keys(): + conf[_SMBE.DATA][key] = specified_data_block[key] + conf[_SMBE.DATA][_SMBE.DATA_TRAININGSET_FILE] = tmp_input_data_csv + if _SMBE.DATA_TESTSET_FILE in conf[_SMBE.DATA].keys(): + conf[_SMBE.DATA].pop(_SMBE.DATA_TESTSET_FILE, None) + self._logger.log( + f"Removed test set specification, not supported yet.", _LE.WARNING + ) + return conf + + def _write_OptunaAZ_configuration( + self, + tmp_input_config_json: str, + tmp_input_data_csv: str, + settings: StepSettingsParameters, + ): + config_path = settings.arguments.parameters[_SME.CONFIG] + with open(config_path, "r") as file: + optunaaz_conf = file.read().replace("\r", "").replace("\n", "") + optunaaz_conf = json.loads(optunaaz_conf) + optunaaz_conf = self._update_data_block( + optunaaz_conf, tmp_input_data_csv, settings + ) + with open(tmp_input_config_json, "w") as file: + json.dump(optunaaz_conf, fp=file, indent=4) + self._logger.log( + f"Wrote updated OptunaAZ configuration file to {tmp_input_config_json}.", + _LE.DEBUG, + ) + + def _write_input_csv( + self, + conformers: List[Conformer], + tmp_input_data_csv: str, + settings: StepSettingsParameters, + ): + def _get_tag(conformer: Conformer, tag: str) -> str: + try: + value = conformer.get_molecule().GetProp(tag).strip() + except KeyError: + value = np.nan + return value + + smiles_column = settings.additional[_SMBE.DATA][_SMBE.DATA_INPUT_COLUMN] + response_column = settings.additional[_SMBE.DATA][_SMBE.DATA_RESPONSE_COLUMN] + + # initialize the dictionary + dict_result = OrderedDict() + dict_result[_WE.RDKIT_NAME] = ["" for _ in range(len(conformers))] + dict_result[smiles_column] = ["" for _ in range(len(conformers))] + dict_result[response_column] = ["" for _ in range(len(conformers))] + + # populate the dictionary with the values + for irow in range(len(conformers)): + conf = conformers[irow] + dict_result[_WE.RDKIT_NAME][irow] = conf.get_index_string() + dict_result[smiles_column][irow] = _get_tag(conf, smiles_column) + dict_result[response_column][irow] = _get_tag(conf, response_column) + + # do the writeout (after sanitation) + df_result = pd.DataFrame.from_dict(dict_result) + df_result.to_csv( + path_or_buf=tmp_input_data_csv, + sep=",", + na_rep="", + header=True, + index=False, + mode="w", + quoting=None, + ) + + def _get_arguments( + self, + tmp_input_config_json: str, + tmp_output_best_model_pkl: str, + tmp_output_best_parameters_json: str, + tmp_output_production_pkl: str, + ) -> List[str]: + arguments = [ + _SME.CONFIG, + tmp_input_config_json, + _SME.MERGED_MODEL_OUTPATH, + tmp_output_production_pkl, + _SME.BEST_MODEL_OUTPATH, + tmp_output_best_model_pkl, + _SME.BEST_BUILDCONFIG_OUTPATH, + tmp_output_best_parameters_json, + ] + return arguments + + def _parse_output( + self, + tmp_input_config_json: str, + tmp_input_data_csv: str, + tmp_output_best_parameters_json: str, + tmp_output_production_pkl: str, + ): + # loading the final model is crucial (and the end-artifact for this step) + try: + with open(tmp_output_production_pkl, "rb") as f: + data = f.read() + self.data.generic.add_file( + GenericData( + file_name=_SMBE.TMP_OUTPUT_PRODUCTION_MODEL, file_data=data + ) + ) + except FileNotFoundError as e: + self._logger.log( + f"Could not load production model from path {tmp_output_production_pkl}.", + _LE.ERROR, + ) + raise e + + # loading the JSON with the best hyper-parameter configuration + try: + with open(tmp_output_best_parameters_json, "r") as f: + data = f.read().replace("\r", "").replace("\n", "") + data = json.loads(data) + self.data.generic.add_file( + GenericData( + file_name=_SMBE.TMP_OUTPUT_BEST_PARAMETERS, file_data=data + ) + ) + except FileNotFoundError as e: + self._logger.log( + f"Could not load best hyper-parameter configuration from path {tmp_output_best_parameters_json}.", + _LE.WARNING, + ) + + # loading the input JSON for OptunaAZ + try: + with open(tmp_input_config_json, "r") as f: + data = f.read() + self.data.generic.add_file( + GenericData(file_name=_SMBE.TMP_INPUT_CONFIG, file_data=data) + ) + except FileNotFoundError as e: + self._logger.log( + f"Could not load input CSV file from path {tmp_input_config_json}.", + _LE.WARNING, + ) + + # loading the input CSV + try: + with open(tmp_input_config_json, "r") as f: + data = f.read() + self.data.generic.add_file( + GenericData(file_name=_SMBE.TMP_INPUT_DATA, file_data=data) + ) + except FileNotFoundError as e: + self._logger.log( + f"Could not load input CSV file from path {tmp_input_config_json}.", + _LE.WARNING, + ) + + def execute(self): + # make a copy of the settings to avoid side-effects with the dictionaries + settings = deepcopy(self.settings) + + # generate temporary files + tmp_dir = self._move_to_temp_dir() + ( + tmp_input_config_json, + tmp_input_data_csv, + tmp_output_best_model_pkl, + tmp_output_best_parameters_json, + tmp_output_production_pkl, + ) = self._generate_temporary_input_output_files(tmp_dir) + + # write OptunaAZ configuration to file + self._write_OptunaAZ_configuration( + tmp_input_config_json=tmp_input_config_json, + tmp_input_data_csv=tmp_input_data_csv, + settings=settings, + ) + + # unroll all conformers + all_conformers = [] + for compound in self.get_compounds(): + for enumeration in compound: + all_conformers = all_conformers + enumeration.get_conformers() + + # write input CSV, derived from the conformers + self._write_input_csv( + conformers=all_conformers, + tmp_input_data_csv=tmp_input_data_csv, + settings=settings, + ) + + # execute OptunaAZ + self._backend_executor.execute( + command=_SME.OPTBUILD_ENTRY_POINT, + arguments=self._get_arguments( + tmp_input_config_json=tmp_input_config_json, + tmp_output_best_model_pkl=tmp_output_best_model_pkl, + tmp_output_best_parameters_json=tmp_output_best_parameters_json, + tmp_output_production_pkl=tmp_output_production_pkl, + ), + check=False, + ) + + # parse the output + self._parse_output( + tmp_input_config_json=tmp_input_config_json, + tmp_input_data_csv=tmp_input_data_csv, + tmp_output_best_parameters_json=tmp_output_best_parameters_json, + tmp_output_production_pkl=tmp_output_production_pkl, + ) + + # clean-up + self._restore_working_dir() + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/prediction/predictor.py b/icolos/core/workflow_steps/prediction/predictor.py new file mode 100644 index 0000000..0a80043 --- /dev/null +++ b/icolos/core/workflow_steps/prediction/predictor.py @@ -0,0 +1,86 @@ +import pickle +from copy import deepcopy + +import numpy as np +from typing import List + +from pydantic import BaseModel +from rdkit import Chem + +from icolos.utils.general.icolos_exceptions import StepFailed, get_exception_message +from icolos.utils.enums.step_enums import StepPredictorEnum +from icolos.core.workflow_steps.io.base import StepIOBase +from icolos.core.workflow_steps.step import _LE + +from icolos.utils.general.convenience_functions import * + +_SPE = StepPredictorEnum() + + +class StepPredictor(StepIOBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + @classmethod + def _load_scikit_model(cls, model_path: str): + with open(model_path, "rb") as f: + scikit_model = pickle.load(f) + return scikit_model + + def _get_feature_values( + self, conformer: Chem.Mol, feature_names: List[str] + ) -> np.ndarray: + list_values = [] + for feature in feature_names: + try: + list_values.append(float(conformer.GetProp(feature))) + except KeyError as e: + self._logger.log( + f"Could not find feature / property, error message: {get_exception_message(e)}", + _LE.ERROR, + ) + raise e + + # cast list to 2D array + return np.array([list_values]) + + def execute(self): + # get parameters + parameters = deepcopy(self.settings.additional) + model_path = nested_get(parameters, _SPE.MODEL_PATH, default=None) + feature_names = nested_get(parameters, _SPE.FEATURES, default=None) + name_predicted = nested_get( + parameters, _SPE.NAME_PREDICTED, default=_SPE.NAME_PREDICTED_DEFAULT + ) + + # check parameters; model_path and features are mandatory + if model_path is None or feature_names is None: + message = f"Parameters {_SPE.MODEL_PATH} (path to model) and {_SPE.FEATURES} (list with features) have to be set - abort." + self._logger.log(message, _LE.ERROR) + raise StepFailed(message) + if name_predicted == _SPE.NAME_PREDICTED_DEFAULT: + self._logger.log( + f"Name of predicted property not specified, using default value {_SPE.NAME_PREDICTED_DEFAULT} instead (not recommended).", + _LE.WARNING, + ) + + # load model from file and predict endpoint + model = self._load_scikit_model(model_path=model_path) + predicted = 0 + for compound in self.get_compounds(): + for enumeration in compound.get_enumerations(): + for conformer in enumeration.get_conformers(): + if not self._input_object_valid(conformer): + continue + + f_values = self._get_feature_values( + conformer=conformer.get_molecule(), feature_names=feature_names + ) + conformer.get_molecule().SetProp( + name_predicted, str(model.predict(X=f_values)[0]) + ) + predicted += 1 + self._logger.log( + f"Predicted {name_predicted} for {predicted} conformers in {len(self.get_compounds())} compounds.", + _LE.INFO, + ) diff --git a/icolos/core/workflow_steps/schrodinger/__init__.py b/icolos/core/workflow_steps/schrodinger/__init__.py new file mode 100644 index 0000000..010d3cc --- /dev/null +++ b/icolos/core/workflow_steps/schrodinger/__init__.py @@ -0,0 +1,9 @@ +from icolos.core.workflow_steps.schrodinger.prime import StepPrime +from icolos.core.workflow_steps.schrodinger.macromodel import StepMacromodel +from icolos.core.workflow_steps.schrodinger.ligprep import StepLigprep +from icolos.core.workflow_steps.schrodinger.glide import StepGlide +from icolos.core.workflow_steps.schrodinger.prepwizard import StepPrepwizard +from icolos.core.workflow_steps.schrodinger.fep_plus_setup import StepFepPlusSetup +from icolos.core.workflow_steps.schrodinger.fep_plus_execution import StepFepPlusExec +from icolos.core.workflow_steps.schrodinger.desmond_preprocessor import StepDesmondSetup +from icolos.core.workflow_steps.schrodinger.desmond_exec import StepDesmondExec diff --git a/icolos/core/workflow_steps/schrodinger/base.py b/icolos/core/workflow_steps/schrodinger/base.py new file mode 100644 index 0000000..7a1ba8f --- /dev/null +++ b/icolos/core/workflow_steps/schrodinger/base.py @@ -0,0 +1,366 @@ +import os +from typing import Optional, Iterable, Union +from pydantic import BaseModel +from icolos.core.workflow_steps.step import StepBase +from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum +from icolos.utils.execute_external.license_token_guard import ( + TokenGuardParameters, + SchrodingerLicenseTokenGuard, +) +from icolos.utils.general.files_paths import attach_root_path +from icolos.utils.execute_external.schrodinger import SchrodingerExecutor +from icolos.utils.enums.step_enums import StepDesmondEnum +from icolos.core.workflow_steps.step import _LE +import re +from shutil import copy +from typing import Dict + + +_EE = SchrodingerExecutablesEnum() +_SDE = StepDesmondEnum() + + +class StepSchrodingerBase(StepBase, BaseModel): + + token_guard: Optional[TokenGuardParameters] = None + + def __init__(self, **data): + super().__init__(**data) + + def _apply_token_guard(self): + if self.token_guard is not None: + token_guard = SchrodingerLicenseTokenGuard(token_guard=self.token_guard) + token_guard.guard() + + # TODO: Deprecated - use self.converter + def _translate_SDF_to_MAE( + self, sdf_path: str, mae_path: str, executor: SchrodingerExecutor + ): + """As "Glide" is only able to read MAE (Maestro) files, write the ligands out in that format.""" + + # call "sdconvert" from Schrodinger's software + arguments = [ + "".join([_EE.SDCONVERT_I, _EE.SDCONVERT_FORMAT_SD]), + sdf_path, + "".join([_EE.SDCONVERT_O, _EE.SDCONVERT_FORMAT_MAE]), + mae_path, + ] + execution_result = executor.execute( + command=_EE.SDCONVERT, arguments=arguments, check=True + ) + + def _translate_MAE_to_SDF( + self, mae_path: str, sdf_path: str, executor: SchrodingerExecutor + ): + """In cases where the write-out mode for Glide is not producing SDF files.""" + + # call "sdconvert" from Schrodinger's software + arguments = [ + "".join([_EE.SDCONVERT_I, _EE.SDCONVERT_FORMAT_MAE]), + mae_path, + "".join([_EE.SDCONVERT_O, _EE.SDCONVERT_FORMAT_SD]), + sdf_path, + ] + execution_result = executor.execute( + command=_EE.SDCONVERT, arguments=arguments, check=True + ) + + def _translate_PDB_to_MAE( + self, pdb_path: str, mae_path: str, executor: SchrodingerExecutor + ): + """In cases where the write-out mode for Glide is not producing SDF files.""" + + # call "sdconvert" from Schrodinger's software + arguments = [ + "".join([_EE.SDCONVERT_I, _EE.STRUCTCAT_FORMAT_PDB]), + pdb_path, + "".join([_EE.SDCONVERT_O, _EE.SDCONVERT_FORMAT_MAE]), + mae_path, + ] + execution_result = executor.execute( + command=_EE.STRUCTCONVERT, arguments=arguments, check=True + ) + + def _replace_config_value(self, key, value, config): + value = str(value) + pattern = fr"({key} =).*" + pattern = re.compile(pattern) + config = re.sub(pattern, fr"\1 {value}", config) + return config + + def _get_template(self, file_name): + file = [ + file + for file in os.listdir(attach_root_path("icolos/config/desmond")) + if file_name in file + ] + assert len(file) == 1 + return file[0] + + def _write_config(self, tmp_dir, dict_: Dict, file_name): + # see if a config file was specified, assume no further changes: + if _SDE.CONFIG in dict_.keys() and dict_[_SDE.CONFIG] is not None: + copy(dict_[_SDE.CONFIG], tmp_dir) + else: + template = self._get_template(file_name) + with open(attach_root_path(f"icolos/config/desmond/{template}"), "r") as f: + config = f.read() + for k, v in dict_.items(): + config = self._replace_config_value(k, v, config) + + self._logger.log(f"Compiled file {file_name}...", _LE.DEBUG) + for line in config.split("\n"): + self._logger_blank.log(line, _LE.DEBUG) + with open(os.path.join(tmp_dir, file_name), "w") as f: + f.write(config) + + def _parse_arguments(self, defaults): + args = [] + + for flag in self.settings.arguments.flags: + args.append(flag) + if "-WAIT" not in args: + args.append("-WAIT") + for k, v in self.settings.arguments.parameters.items(): + args.append(k) + args.append(v) + for k, v in defaults.items(): + if k not in args: + args.append(k) + args.append(v) + return args + + @staticmethod + def _parse_maestro_in_file( + lines: Iterable[str], + ) -> Dict[str, Union[str, Dict[str, str]]]: + """Parses Maestro input, and returns keywords dict for it.""" + + separator3 = " " + indent4 = " " + block_starters = { + "[CONSTRAINT_GROUP", + "[FEATURE", + } + + # All Glide keywords. Get all keywords with: + # $ module load schrodinger + # $ glide -docking-keywords | cut -d' ' -f1 | sed 's/.*/"&"/' | paste -sd , - + # List keywords, get first word, wrap in quotes, join lines. + # See: + # - https://stackoverflow.com/a/19145499 + # - https://unix.stackexchange.com/a/251362 + allowed_keywords = { + "AMIDE_MODE", + "AMIDE_TRANS_ALL", + "AMIDE_TRANSTOL", + "ASL_RES_INTERACTION", + "CALC_INPUT_RMS", + "CANONICALIZE", + "COMPRESS_POSES", + "CORE_ATOMS", + "CORE_DEFINITION", + "CORE_FILTER", + "CORE_POS_MAX_RMSD", + "CORE_RESTRAIN", + "CORE_RESTRAIN_V", + "CORE_SMARTS", + "CORE_SNAP", + "CORECONS_FALLBACK", + "CSV_PROPS_FILE", + "CV_CUTOFF", + "DIELMOD", + "DOCKING_METHOD", + "DOINTRA", + "DOINTRA_SCALE", + "DSCORE_CUTOFF", + "EPIK_PENALTIES", + "EXPANDED_SAMPLING", + "FITDEN", + "FORCEFIELD", + "FORCEPLANAR", + "GLIDE_CONFGEN_BADDIST2", + "GLIDE_CONFGEN_EFCUT", + "GLIDE_CONS_FEAT_FILE", + "GLIDE_CONS_FINALONLY", + "GLIDE_CONS_RMETCOORD", + "GLIDE_CONS_RNOEMAX", + "GLIDE_CONS_RNOEMIN", + "GLIDE_CONS_RPOS", + "GLIDE_CONS_XMETCOORD", + "GLIDE_CONS_XNOE", + "GLIDE_CONS_XPOS", + "GLIDE_CONS_YMETCOORD", + "GLIDE_CONS_YNOE", + "GLIDE_CONS_YPOS", + "GLIDE_CONS_ZMETCOORD", + "GLIDE_CONS_ZNOE", + "GLIDE_CONS_ZPOS", + "GLIDE_DIELCO", + "GLIDE_ELEMENTS", + "GLIDE_EXVOL_PENAL_NUM", + "GLIDE_EXVOL_PENAL_STRENGTH", + "GLIDE_NTOTALCONS", + "GLIDE_NUMEXVOL", + "GLIDE_NUMMETCOORDCONS", + "GLIDE_NUMMETCOORDSITES", + "GLIDE_NUMNOECONS", + "GLIDE_NUMPOSITCONS", + "GLIDE_NUMUSEXVOL", + "GLIDE_OUTPUT_USEHTOR", + "GLIDE_REFLIG_FORMAT", + "GLIDE_REXVOL", + "GLIDE_REXVOLIN", + "GLIDE_TORCONS_ALLBONDS", + "GLIDE_TORCONS_IATOMS", + "GLIDE_TORCONS_JATOMS", + "GLIDE_TORCONS_KATOMS", + "GLIDE_TORCONS_LATOMS", + "GLIDE_TORCONS_PATTERN_INDEX", + "GLIDE_TORCONS_PATTERNS", + "GLIDE_TORCONS_SETVAL", + "GLIDE_TORCONS_VALUES", + "GLIDE_TORCONSFILE", + "GLIDE_XEXVOL", + "GLIDE_XP_NMAXCORE", + "GLIDE_XP_RMSCUT", + "GLIDE_YEXVOL", + "GLIDE_ZEXVOL", + "GLIDECONS", + "GLIDECONSFEATATOMS", + "GLIDECONSFEATHASINCLUDE", + "GLIDECONSFEATINCLUDE", + "GLIDECONSFEATINDEX", + "GLIDECONSFEATPATTERNS", + "GLIDECONSGROUPNREQUIRED", + "GLIDECONSNAMES", + "GLIDECONSUSEMET", + "GLIDESCORUSEMET", + "GLIDEUSEALLEXVOL", + "GLIDEUSECONSFEAT", + "GLIDEUSECONSFEATINDEX", + "GLIDEUSECONSGROUPINDEX", + "GLIDEUSECONSLABELS", + "GLIDEUSEXVOL", + "GLIDEUSEXVOLNAMES", + "GLIDEXVOLNAMES", + "GRIDFILE", + "GSCORE", + "GSCORE_CUTOFF", + "HAVEGLIDECONSFEAT", + "HBOND_ACCEP_HALO", + "HBOND_CUTOFF", + "HBOND_DONOR_AROMH", + "HBOND_DONOR_AROMH_CHARGE", + "HBOND_DONOR_HALO", + "INCLUDE_INPUT_CONF", + "INCLUDE_INPUT_RINGS", + "JOBNAME", + "KEEP_SUBJOB_POSES", + "KEEPRAW", + "KEEPSKIPPED", + "LIG_CCUT", + "LIG_MAECHARGES", + "LIG_VSCALE", + "LIGAND_END", + "LIGAND_START", + "LIGANDFILE", + "LIGANDFILES", + "LIGFORMAT", + "LIGPREP", + "LIGPREP_ARGS", + "MACROCYCLE", + "MACROCYCLE_OPTIONS", + "MAX_ITERATIONS", + "MAXATOMS", + "MAXKEEP", + "MAXREF", + "MAXROTBONDS", + "METAL_CUTOFF", + "NENHANCED_SAMPLING", + "NMAXRMSSYM", + "NOSORT", + "NREPORT", + "NREQUIRED_CONS", + "OUTPUTDIR", + "PAIRDISTANCES", + "PEPTIDE", + "PHASE_DB", + "PHASE_NCONFS", + "PHASE_SUBSET", + "POSE_DISPLACEMENT", + "POSE_HTORSION", + "POSE_OUTTYPE", + "POSE_RMSD", + "POSES_PER_LIG", + "POSTDOCK", + "POSTDOCK_ITMAX", + "POSTDOCK_NPOSE", + "POSTDOCK_SCITMAX", + "POSTDOCK_XP_DELE", + "POSTDOCKCG", + "POSTDOCKLIGMIN", + "POSTDOCKSTRAIN", + "PRECISION", + "PREMIN", + "PREMINCG", + "PREMINELEC", + "PREMINITMAX", + "RADIUS_RES_INTERACTION", + "REF_LIGAND_FILE", + "REFINDEX", + "REPORT_CPU_TIME", + "REWARD_INTRA_HBONDS", + "RINGCONFCUT", + "RINGONFLY", + "SAMPLE_N_INVERSIONS", + "SAMPLE_RINGS", + "SCORE_INPUT_POSE", + "SCORE_MINIMIZED_INPUT_POSE", + "SCORING_CUTOFF", + "SHAPE_ATOMS", + "SHAPE_RESTRAIN", + "SHAPE_TYPING", + "SKIP_EPIK_METAL_ONLY", + "STRAIN_GSFACTOR", + "STRAIN_GSTHRESH", + "STRAINELEC", + "SUBSTRATE_PENAL_FILE", + "USE_CONS", + "USE_REF_LIGAND", + "USECOMPMAE", + "WRITE_CSV", + "WRITE_RES_INTERACTION", + "WRITE_TIMINGS_CSV", + "WRITE_XP_DESC", + "WRITEREPT", + } + + result = {} + current_block = None + for linenum, line in enumerate(lines): + if any(line.startswith(starter) for starter in block_starters): + # Block start. + current_block = line.strip() + result[current_block] = {} + elif line.strip() == "": + # Empty line: close current block if any is open, and skip the line. + current_block = None + elif line.startswith(indent4): + # Indented line inside the block. + if current_block is None: + raise ValueError( + f"Unexpected indent outside of block for line {linenum}: {line}" + ) + kw, value = line.strip().split(sep=separator3, maxsplit=1) + result[current_block][kw] = value.strip('"') + elif any(line.startswith(kw) for kw in allowed_keywords): + # Ordinary keywords. + kw, value = line.strip().split(sep=separator3, maxsplit=1) + result[kw] = value + else: + raise ValueError( + f"Unexpected line {linenum} in maestro input file: {line}" + ) + + return result diff --git a/icolos/core/workflow_steps/schrodinger/desmond_exec.py b/icolos/core/workflow_steps/schrodinger/desmond_exec.py new file mode 100644 index 0000000..76f5858 --- /dev/null +++ b/icolos/core/workflow_steps/schrodinger/desmond_exec.py @@ -0,0 +1,114 @@ +import os +from icolos.core.step_utils.structconvert import StructConvert +from pydantic import BaseModel +from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase +from icolos.utils.execute_external.schrodinger import SchrodingerExecutor +from icolos.utils.enums.step_enums import StepDesmondEnum +from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum + +_SDE = StepDesmondEnum() +_SEE = SchrodingerExecutablesEnum() + + +class StepDesmondExec(StepSchrodingerBase, BaseModel): + """ + Executes a full Desmond multisim workflow + """ + + class Config: + underscore_attrs_are_private = True + arbitrary_types_allowed = True + + _struct_converter: StructConvert = None + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(SchrodingerExecutor) + self._check_backend_availability() + self._struct_converter = StructConvert( + binary_location=self.execution.binary_location, + prefix_execution=self.execution.prefix_execution, + ) + + def execute(self): + # takes in the cms file from the preprocessor and runs the full multisim workflow on it + tmp_dir = self._make_tmpdir() + structure = self.data.generic.get_argument_by_extension( + "pdb", rtn_file_object=True + ) + pdb_id = structure.get_file_name() + structure.write(tmp_dir) + # convert the pdb file to mae + self._struct_converter.pdb2mae( + os.path.join(tmp_dir, pdb_id), + os.path.join(tmp_dir, "desmond_md_job_1.mae"), + ) + + preprocess_defaults = { + "-HOST": "localhost", + "-JOBNAME": "desmond_md_job_1", + "-m": "config.msj desmond_md_job_1.mae", + "-o": "setup.cms", + } + arguments = self._parse_arguments(preprocess_defaults) + # compile and write the msj to the tmpdir + config_dict = ( + self.settings.additional[_SDE.SETUP_MSJ_FIELDS] + if _SDE.SETUP_MSJ_FIELDS in self.settings.additional.keys() + else {} + ) + + self._write_config(tmp_dir, dict_=config_dict, file_name=_SDE.PREPROCESS_MSJ) + + # execute + self._backend_executor.execute( + command=_SEE.MULTISIM_EXEC, + arguments=arguments, + check=True, + location=tmp_dir, + ) + + exec_defaults = { + "-HOST": "localhost", + "-JOBNAME": "desmond_production", + "-maxjob": "1", + "-cpu": "1", + "-m": _SDE.PRODUCTION_MSJ, + "-c": _SDE.PRODUCTION_CFG, + "-description": '"Molecular Dynamics" setup.cms', + "-mode": "umbrella", + "-PROJ": tmp_dir, + "-o": "out.cms", + "-lic": _SDE.TOKEN_STR, + } + + msj_config_dict = ( + self.settings.additional[_SDE.MSJ_FIELDS] + if _SDE.MSJ_FIELDS in self.settings.additional.keys() + else {} + ) + + cfg_config_dict = ( + self.settings.additional[_SDE.CFG_FIELDS] + if _SDE.CFG_FIELDS in self.settings.additional.keys() + else {} + ) + + # write the config files: msj for the full workflow, and a cfg for the production sim + self._write_config(tmp_dir, msj_config_dict, _SDE.PRODUCTION_MSJ) + + self._write_config(tmp_dir, cfg_config_dict, _SDE.PRODUCTION_CFG) + + arguments = self._parse_arguments(exec_defaults) + + self._backend_executor.execute( + command=_SEE.MULTISIM_EXEC, + arguments=arguments, + check=True, + location=tmp_dir, + ) + + self._parse_output(tmp_dir) + + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/schrodinger/desmond_preprocessor.py b/icolos/core/workflow_steps/schrodinger/desmond_preprocessor.py new file mode 100644 index 0000000..eba9eed --- /dev/null +++ b/icolos/core/workflow_steps/schrodinger/desmond_preprocessor.py @@ -0,0 +1,75 @@ +import os +from icolos.core.step_utils.structconvert import StructConvert +from pydantic import BaseModel +from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase +from icolos.utils.execute_external.schrodinger import SchrodingerExecutor +from icolos.utils.enums.step_enums import StepDesmondEnum +from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum + +_SDE = StepDesmondEnum() +_SEE = SchrodingerExecutablesEnum() + + +class StepDesmondSetup(StepSchrodingerBase, BaseModel): + """ + Run preprocessing step to generate system for Desmond simulation + """ + + _struct_converter: StructConvert = None + + class Config: + underscore_attrs_are_private = True + arbitrary_types_allowed = True + + def __init__(self, **data): + super().__init__(**data) + self._initialize_backend(SchrodingerExecutor) + self._check_backend_availability() + self._struct_converter = StructConvert( + binary_location=self.execution.binary_location, + prefix_execution=self.execution.prefix_execution, + ) + + def execute(self): + tmp_dir = self._make_tmpdir() + + # need to take a structure file, possibly preprocess if pdb + # get the structure file and extract the file name + structure = self.data.generic.get_argument_by_extension( + "pdb", rtn_file_object=True + ) + pdb_id = structure.get_file_name() + structure.write(tmp_dir) + # convert the pdb file to mae + + self._struct_converter.pdb2mae( + os.path.join(tmp_dir, pdb_id), + os.path.join(tmp_dir, "desmond_md_job_1.mae"), + ) + + defaults = { + "-HOST": "localhost", + "-JOBNAME": "desmond_md_job_1", + "-m": "config.msj desmond_md_job_1.mae", + "-o": "setup.cms", + } + arguments = self._parse_arguments(defaults) + # compile and write the msj to the tmpdir + config_dict = ( + self.settings.additional[_SDE.MSJ_FIELDS] + if _SDE.MSJ_FIELDS in self.settings.additional.keys() + else {} + ) + + self._write_config(tmp_dir, dict_=config_dict, file_name=_SDE.PREPROCESS_MSJ) + + # execute + self._backend_executor.execute( + command=_SEE.MULTISIM_EXEC, + arguments=arguments, + check=True, + location=tmp_dir, + ) + self._parse_output(tmp_dir) + + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/schrodinger/fep_analysis.py b/icolos/core/workflow_steps/schrodinger/fep_analysis.py new file mode 100644 index 0000000..5d93f83 --- /dev/null +++ b/icolos/core/workflow_steps/schrodinger/fep_analysis.py @@ -0,0 +1,27 @@ +from icolos.core.workflow_steps.schrodinger.fep_base import StepFEPBase +from icolos.utils.enums.step_enums import StepBaseEnum, StepFepPlusEnum +from icolos.utils.enums.program_parameters import FepPlusEnum + +from pydantic import BaseModel + +_FE = FepPlusEnum() +_SFE = StepFepPlusEnum() +_SBE = StepBaseEnum + + +class StepFepPlusAnalysis(StepFEPBase, BaseModel): + """ + Standalone class to analyse data from a previous fep job + """ + + def __init__(self, **data): + super().__init__(**data) + + def execute(self): + """ + Analyses the map produced from an FEP run + """ + tmp_dir = self._make_tmpdir() + self.data.generic.write_out_all_files(tmp_dir) + self._extract_log_file_data(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/schrodinger/fep_base.py b/icolos/core/workflow_steps/schrodinger/fep_base.py new file mode 100644 index 0000000..426fa70 --- /dev/null +++ b/icolos/core/workflow_steps/schrodinger/fep_base.py @@ -0,0 +1,211 @@ +from pydantic import BaseModel +from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase +import numpy as np +from scipy.sparse import csr_matrix +from scipy.sparse.csgraph import shortest_path +from icolos.utils.enums.step_enums import StepFepPlusEnum +from typing import List +import time +import os +from icolos.core.workflow_steps.step import _LE + +_SFE = StepFepPlusEnum() + + +class StepFEPBase(StepSchrodingerBase, BaseModel): + """ + Base class containing common functionality for Schrodinger FEP+ workflows + """ + + def __init__(self, **data): + super().__init__(**data) + + def _parse_output(self, tmp_dir): + # pick up the final annotated map construction + self.data.generic.clear_file_dict() + self._logger.log(f"Reading output map.", _LE.INFO) + data = None + counts = 0 + # hold whilst the job data gets written to local fs + while data is None and counts < 50000: + try: + path = [ + file + for file in os.listdir(tmp_dir) + if file.endswith(_SFE.FMP_OUTPUT_FILE) + ] + assert len(path) == 1 + path = path[0] + with open(os.path.join(tmp_dir, path), "rb") as f: + data = f.read() + except AssertionError: + self._logger.log( + "Output file has not yet appeared in the file system, sleeping and retrying...", + _LE.INFO, + ) + time.sleep(15) + counts += 1 + + self._add_data_to_generic(path, data) + + def _extract_log_file_data(self, tmp_dir): + """ + Parses FEP log file to extract edge and node properties + """ + lines = None + counts = 0 + # wait whilst job sits in the queue + while lines is None and counts < 50000: + try: + log_file = [ + file for file in os.listdir(tmp_dir) if file.endswith(_SFE.LOGFILE) + ] + assert len(log_file) == 1 + log_file = log_file[0] + + with open(os.path.join(tmp_dir, log_file), "r") as f: + lines = f.readlines() + + edge_header_index = [ + idx for idx, s in enumerate(lines) if _SFE.EDGE_HEADER_LINE in s + ][-1] + node_header_index = [ + idx for idx, s in enumerate(lines) if _SFE.NODE_HEADER_LINE in s + ][-1] + end_of_data_index = [ + idx for idx, s in enumerate(lines) if _SFE.DATA_TERMINUS in s + ][0] + + edge_data_lines = [ + line + for line in lines[edge_header_index + 3 : node_header_index - 1] + ] + node_data_lines = [ + line + for line in lines[node_header_index + 3 : end_of_data_index - 1] + ] + + self._process_edge_lines(edge_data_lines) + self._process_node_lines(node_data_lines) + + except AssertionError: + self._logger.log( + "Log file has not yet appeared in the file system, sleeping and retrying...", + _LE.INFO, + ) + time.sleep(15) + counts += 1 + + def _process_node_lines(self, data: List[str]) -> None: + for entry in data: + fields = entry.split() + idx = fields[1] + dG = fields[2] + # attach dG tags to compound objects if present + if self.data.compounds: + # account for running this step compoundless + self.data.compounds[int(idx[0])].get_enumerations()[0].get_conformers()[ + 0 + ].get_molecule().SetProp("dG", str(dG)) + self._logger.log( + f"dG directly from the output file for compound {idx} is {dG} ", + _LE.INFO, + ) + + def _process_edge_lines(self, edge_data: List[str]) -> None: + """ + Calibrate dG values using a reference compound and edge ddG from log file output, return dG for each compound + """ + + # caluclate the max ligand index, accounting for ligands that may have been skipped in previous steps, so can't rely on self.get_compounds() + len_nodes = 0 + for line in edge_data: + parts = line.split() + + lig_from = int(parts[1].split(":")[0]) + lig_to = int(parts[3].split(":")[0]) + for idx in [lig_from, lig_to]: + if idx > len_nodes: + len_nodes = idx + len_nodes += 1 # account for zero indexed ligands + + error_matrix = np.zeros((len_nodes, len_nodes)) + ddG_matrix = np.zeros((len_nodes, len_nodes)) + for line in edge_data: + parts = line.split() + try: + # parse the compound info from the log file + lig_from = int(parts[1].split(":")[0]) + lig_to = int(parts[3].split(":")[0]) + ddG = float(parts[4].split("+-")[0]) + err = float(parts[4].split("+-")[1]) + except ValueError: + self._logger.log( + f"Line: {line} from the logfile contained an unexpected datatype - cannot process this edge - skipping", + _LE.WARNING, + ) + continue + + error_matrix[lig_from, lig_to] = err + error_matrix[lig_to, lig_from] = err + ddG_matrix[lig_from, lig_to] = ddG + ddG_matrix[lig_to, lig_from] = -ddG + error_matrix = csr_matrix(error_matrix) + # compute shortest path from one ligand to the anchor + _, predecessors = shortest_path( + error_matrix, directed=False, return_predecessors=True, indices=0 + ) + self._construct_dg_per_compound(ddG_matrix, predecessors, error_matrix) + + def _construct_dg_per_compound( + self, ddG: np.ndarray, predecessors: List, error_matrix: np.ndarray + ) -> None: + """ + Calculate the calibrated binding free energy per compound using a reference value + Attach calcualted dG to compounds + """ + try: + ref_dG = self.settings.additional[_SFE.REFERENCE_DG] + except KeyError: + self._logger.log( + "Expected to find a reference dG value for the lead compound, but none was found." + "Defaulting to 0.00, you will need to apply a manual correction afterwards", + _LE.WARNING, + ) + ref_dG = 0.00 + + def _calculate_dg(comp_num: int, dG=ref_dG, err=0): + prev_index = predecessors[comp_num] + dG += ddG[prev_index, comp_num] + err += error_matrix[prev_index, comp_num] + if prev_index != 0: + _calculate_dg(prev_index, dG=dG, err=err) + else: + data = str(round(dG, 2)) + "+-" + str(round(err, 2)) + self.data.compounds[idx].get_enumerations()[0].get_conformers()[ + 0 + ].get_molecule().SetProp("map_dG", data) + self._logger.log( + f"Calculated dG from spanning tree for compound {idx} is {data}", + _LE.INFO, + ) + + for comp in self.get_compounds(): + idx = comp.get_compound_number() + # check whether the compound appeared in the final map + try: + + if idx == 0: + comp.get_enumerations()[0].get_conformers()[ + 0 + ].get_molecule().SetProp( + "map_dG", str(self.settings.additional[_SFE.REFERENCE_DG]) + ) + if idx != 0: # skip the reference compound + _calculate_dg(idx) + except IndexError: + self._logger.log( + f"Compound {idx} was not found in the output map, it was likely dropped during the workflow", + _LE.WARNING, + ) + continue diff --git a/icolos/core/workflow_steps/schrodinger/fep_plus_execution.py b/icolos/core/workflow_steps/schrodinger/fep_plus_execution.py new file mode 100644 index 0000000..5e4af72 --- /dev/null +++ b/icolos/core/workflow_steps/schrodinger/fep_plus_execution.py @@ -0,0 +1,192 @@ +from copy import deepcopy +from typing import List + +from icolos.utils.enums.step_enums import StepBaseEnum, StepFepPlusEnum +from icolos.utils.enums.program_parameters import FepPlusEnum +from icolos.utils.execute_external.fep_plus import FepPlusExecutor + +from pydantic import BaseModel, PrivateAttr +import os +from icolos.core.workflow_steps.step import _LE +import time +from icolos.core.workflow_steps.schrodinger.fep_base import StepFEPBase + +from icolos.utils.general.icolos_exceptions import StepFailed + +_FE = FepPlusEnum() +_SFE = StepFepPlusEnum() +_SBE = StepBaseEnum + + +class StepFepPlusExec(StepFEPBase, BaseModel): + """ + Execute the FEP+ workflow, interfaced with AWS + """ + + class Config: + underscore_attrs_are_private = True + + _job_id = PrivateAttr() + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=FepPlusExecutor) + self._check_backend_availability() + + self._job_id = None + + def _parse_arguments(self): + parameters = deepcopy(self.settings.arguments.parameters) + arguments = [] + if len(self.settings.arguments.flags) > 0: + for flag in self.settings.arguments.flags: + arguments.append(str(flag)) + if parameters: + for key in parameters.keys(): + arguments.append(key) + if parameters[key] is not None and parameters[key] != "": + arguments.append(str(parameters[key])) + # for our AWS config, need to set processors per job =1 + if "-ppj" not in arguments: + arguments.extend(["-ppj", "1"]) + self._logger.log( + "Set -ppj 1 for AWS execution, since no override was specified", + _LE.DEBUG, + ) + if _SFE.RETRIES not in arguments: + arguments.extend([_SFE.RETRIES, "3"]) + arguments.append(_SFE.FMP_OUTPUT_FILE) + + # remove "-WAIT" if it has been set, as this will interfere with the implementation (and might cause issues + # due to file system write buffering) + if _SFE.WAIT_FLAG in arguments: + self._logger.log( + "Ignoring -WAIT flag for FEP+ execution (this would interfere with the implementation).", + _LE.WARNING, + ) + arguments = [arg for arg in arguments if arg != _SFE.WAIT_FLAG] + return arguments + + def _unit_test_simulate_output(self, fmp_data, log_data): + # call this method from the unit instead of the execute method to write out the expected output + tmp_dir = self._make_tmpdir() + with open( + os.path.join( + tmp_dir, + f"{self.settings.arguments.parameters[_SFE.JOBNAME_FLAG]}_{_SFE.FMP_OUTPUT_FILE}", + ), + "w", + ) as f: + f.write(fmp_data) + with open( + os.path.join( + tmp_dir, + f"{self.settings.arguments.parameters[_SFE.JOBNAME_FLAG]}_{_SFE.LOGFILE}", + ), + "w", + ) as f: + f.write(fmp_data) + self._parse_output(tmp_dir) + self._extract_log_file_data(tmp_dir) + self._remove_temporary(tmp_dir) + + def _get_job_id(self, result): + parts = str(result.stdout).split("\n") + for part in parts: + if _SFE.JOBID_STRING in part: + # full_job_id looks something like 549a938d-d2ca-11eb-b9f2-0a6713e9bd3a but only the first part of the + # hash is needed to access the right job afterwards + full_job_id = part.split(" ")[1] + self._job_id = full_job_id.split("-")[0] + self._logger.log(f"JobId of FEP+ run is {self._job_id}.", _LE.DEBUG) + if self._job_id is None: + self._logger.log( + "Could not obtain JobId after execution - abort.", _LE.ERROR + ) + raise StepFailed + + def _get_log_file(self) -> List[str]: + arguments = [ + self._job_id, + _SFE.FILE_NAME, + f'{self.settings.arguments.parameters[_SFE.JOBNAME_FLAG]}_{_SFE.LOGFILE}"', + ] + logging_result = None + trials = 0 + while trials < 30000: + logging_result = self._backend_executor.execute( + command=_FE.JSC_TAIL_FILE, arguments=arguments, check=False + ) + if logging_result.returncode == 1: + time.sleep(30) + trials += 1 + continue + elif logging_result.returncode == 0: + break + if logging_result is None: + raise StepFailed("Could not obtain log file from server within time limit.") + log_lines = str(logging_result.stdout).split("\n") + return log_lines + + def _get_new_lines(self, old_file) -> List[str]: + new_lines = self._get_log_file() + # take the first n lines off the new log file where n is the length of the old log file + diff = new_lines[len(old_file) - 1 :] + return diff + + def _wait_for_job_completion(self): + # get the log file at this state + log_file = self._get_log_file() + for line in log_file: + self._logger_blank.log(line, _LE.INFO) + # TODO: set maximum (or at least allow to set a maximum) + while (_SFE.FEP_EXEC_COMPLETE not in log_file) and ( + _SFE.FEP_EXEC_PARTIAL_COMPLETE not in log_file + ): + time.sleep(30) + new_lines = self._get_new_lines(log_file) + if len(new_lines) > 0: + for line in new_lines: + self._logger_blank.log(line, _LE.INFO) + log_file.append(line) + + def _clean_up(self, tmp_dir: str): + self._remove_temporary(tmp_dir) + self._job_id = None + + def execute(self): + # generate the temporary directory and populate it with the required files + tmp_dir = self._make_tmpdir() + self.data.generic.write_out_all_files(tmp_dir) + + # check compounds loaded in properly + if not self.data.compounds: + self._logger.log( + f"No compounds were loaded for step {self.step_id}! If this was intentional you can ignore this warning.", + _LE.WARNING, + ) + + # obtain the arguments as a list of strings + arguments = self._parse_arguments() + self._logger.log(f"Executing FEP+ calculation in {tmp_dir}.", _LE.INFO) + + # execute fep_plus + self._apply_token_guard() + result = self._backend_executor.execute( + command=_FE.FEP_EXECUTOR, arguments=arguments, location=tmp_dir, check=True + ) + + # get job ID from the job server + self._get_job_id(result) + + # wait for job completion + self._wait_for_job_completion() + + # extract the edge information from the log file (rather than the annotated map, as this is easier) + self._parse_output(tmp_dir) + self._extract_log_file_data(tmp_dir) + self._logger.log(f"Completed FEP+ execution.", _LE.INFO) + + # clean-up and reset + self._clean_up(tmp_dir) diff --git a/icolos/core/workflow_steps/schrodinger/fep_plus_setup.py b/icolos/core/workflow_steps/schrodinger/fep_plus_setup.py new file mode 100644 index 0000000..4e5899e --- /dev/null +++ b/icolos/core/workflow_steps/schrodinger/fep_plus_setup.py @@ -0,0 +1,303 @@ +from typing import List +from icolos.core.containers.generic import GenericData +from icolos.core.step_utils.structconvert import StructConvert +from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase +from icolos.utils.enums.program_parameters import ( + FepPlusEnum, + SchrodingerExecutablesEnum, +) +from icolos.utils.enums.step_enums import StepBaseEnum, StepFepPlusEnum, StepGlideEnum +from icolos.utils.execute_external.fep_plus import FepPlusExecutor +from rdkit.Chem import SDMolSupplier +from icolos.utils.execute_external.schrodinger import SchrodingerExecutor +from icolos.core.workflow_steps.step import _LE +import os +from pydantic import BaseModel +from rdkit.Chem import SDWriter + +_SFE = StepFepPlusEnum() +_FE = FepPlusEnum() +_SEE = SchrodingerExecutablesEnum() +_SBE = StepBaseEnum +_SGE = StepGlideEnum() + + +class StepFepPlusSetup(StepSchrodingerBase, BaseModel): + """ + Construct and analyse perturbation map for set of congeneric ligands + Supports extracting structures from poseviewer or pdb files + """ + + _schrodinger_executor: SchrodingerExecutor = None + _converter: StructConvert = None + + def __init__(self, **data): + super().__init__(**data) + + self._initialize_backend(executor=FepPlusExecutor) + self._check_backend_availability() + + self._schrodinger_executor = SchrodingerExecutor( + prefix_execution=self.execution.prefix_execution, + binary_location=self.execution.binary_location, + ) + self._converter = StructConvert( + prefix_execution=self.execution.prefix_execution, + binary_location=self.execution.binary_location, + ) + + def _extract_receptor_from_pv(self, tmp_dir, input_file: str = _SFE.RECEPTOR_MAEGZ): + # run split_structure.py to obtain the receptor_structure + self._logger.log(f"Extracting receptor from structure.", _LE.INFO) + self._schrodinger_executor.execute( + command=_SEE.STRUCT_SPLIT, + arguments=[ + "-m", + "pdb", + "-many_files", + os.path.join(tmp_dir, input_file), + f"{_SFE.STRUCT_SPLIT_BASE}.mae", + ], + check=True, + location=tmp_dir, + ) + + # get rid of the original receptor structure now we have the new one + os.remove(os.path.join(tmp_dir, _SFE.RECEPTOR_MAEGZ)) + + def _write_receptor_from_pv(self, tmp_dir): + # Handles writing the receptor structure to tmpdir, either from a poseviewer file, or a provided receptor + # take the first poseviewer file it can find and split the stricure, take only the receptor + for compound in self.get_compounds(): + for enumeration in compound.get_enumerations(): + for conformer in enumeration.get_conformers(): + if ( + _SGE.GLIDE_POSEVIEWER_FILE_KEY + in conformer.get_extra_data().keys() + ): + with open( + os.path.join(tmp_dir, _SFE.RECEPTOR_MAEGZ), "wb" + ) as f: + f.write( + conformer.get_extra_data()[ + _SGE.GLIDE_POSEVIEWER_FILE_KEY + ] + ) + break + if _SFE.RECEPTOR_MAEGZ in os.listdir(tmp_dir): + self._logger.log( + f"Writing poseviewer file to temporary directory.", _LE.INFO + ) + self._extract_receptor_from_pv(tmp_dir) + elif self.data.generic.get_files_by_extension("pdb"): + # a pdb file was loaded to generic data, use this as the receptor structure + self.data.generic.get_argument_by_extension( + "pdb", rtn_file_object=True + ).write(os.path.join(tmp_dir, "receptor.pdb"), join=False) + + self._logger.log( + "Converting provided pdb receptor structure to mae", _LE.DEBUG + ) + self._converter.convert( + os.path.join(tmp_dir, "receptor.pdb"), + os.path.join(tmp_dir, f"{_SFE.STRUCT_SPLIT_BASE}_receptor1.mae"), + ) + os.remove(os.path.join(tmp_dir, "receptor.pdb")) + + else: + self._logger.log( + "No poseviewer file was found attached to any of the conformers, and no PDB receptor file was specified - this must be set in the docking step", + _LE.ERROR, + ) + raise FileNotFoundError + + def _check_xray_structure(self, compound_number): + # check to see if an xray structure has been provided for that compound + if _SFE.XRAY_STRUCTURES in self.settings.additional.keys(): + if isinstance(self.settings.additional[_SFE.XRAY_STRUCTURES], dict): + if ( + compound_number + in self.settings.additional[_SFE.XRAY_STRUCTURES].keys() + ): + return True, _FE.DICT + elif os.path.isdir(self.settings.additional[_SFE.XRAY_STRUCTURES]): + if os.path.isfile( + os.path.join( + self.settings.additional[_SFE.XRAY_STRUCTURES], + f"{compound_number}.pdb", + ) + ): + return True, _FE.PATH + return False, None + + def _rename_sdf(self, path, comp_num): + with open(path, "r") as f: + lines = f.readlines()[1:] + new_lines = [f"{comp_num}:0:0\n"] + for line in lines: + new_lines.append(line) + self._remove_temporary(path) + with open(path, "w") as f: + f.writelines(new_lines) + + def _extract_ligand_from_pdb(self, tmp_dir: str, comp_num: int, type: str): + # if ligand poses have been provided from xray structures, extract just the ligand + self._logger.log( + f"Extracting ligand from provided Xray structure for compound {comp_num}", + _LE.DEBUG, + ) + if type == _FE.DICT: + file_path = self.settings.additional[_SFE.XRAY_STRUCTURES[comp_num]] + else: + file_path = os.path.join( + self.settings.additional[_SFE.XRAY_STRUCTURES], f"{comp_num}.pdb" + ) + if not os.path.isfile(file_path): + raise FileNotFoundError( + "The provided path to the xray structure does not exist or is not accessible" + ) + self._schrodinger_executor.execute( + command=_SEE.STRUCT_SPLIT, + arguments=["-m", "pdb", "-many_files", file_path, f"{_SFE.XRAY_SPLIT}.sdf"], + check=True, + location=tmp_dir, + ) + # remove everything apart from the ligand sdf which is concatenated later + lig_found = False + for file in os.listdir(tmp_dir): + idx = file.split("/")[-1] + if idx.startswith(_SFE.XRAY_SPLIT): + if "ligand" in idx: + # need to modify the name from the standard that Schrodinger provides + self._rename_sdf(os.path.join(tmp_dir, file), comp_num) + mols = SDMolSupplier(os.path.join(tmp_dir, file)) + + data = mols[0] + lig_found = True + self._remove_temporary(os.path.join(tmp_dir, file)) + else: + self._remove_temporary(os.path.join(tmp_dir, file)) + if lig_found: + return data + + def _write_input_files(self, tmp_dir): + # write receptor structure to tmpdir, either from poseviewer or provided pdb file + self._write_receptor_from_pv(tmp_dir) + + # write out all conformers present in self.data.compounds to a single sdf file. + writer = SDWriter(os.path.join(tmp_dir, "concatenated.sdf")) + for compound in self.get_compounds(): + # If an xray pose is provided, use this + flag, type = self._check_xray_structure(compound.get_compound_number()) + if flag is True: + self._logger.log( + "Found Xray structure for the ligand - using this in preference to a docking pose", + _LE.DEBUG, + ) + mol = self._extract_ligand_from_pdb( + tmp_dir, compound.get_compound_number(), type + ) + writer.write(mol) + else: + # use the docked conformer + for enumeration in compound.get_enumerations(): + for conformer in enumeration.get_conformers(): + mol = conformer.get_molecule() + writer.write(mol) + + def _parse_arguments(self, io_dict: dict) -> List[str]: + arguments = [] + for key in self.settings.arguments.parameters.keys(): + arguments.append(key) + arguments.append(str(self.settings.arguments.parameters[key])) + for flag in self.settings.arguments.flags: + arguments.append(str(flag)) + for key, value in io_dict.items(): + arguments.append(key) + arguments.append(value) + return arguments + + def _get_structcat_args( + self, tmp_dir: str, out_file_type: str, outfile: str + ) -> List[str]: + arguments = [ + f"{_SEE.STRUCTCAT_I}mae", + os.path.join(tmp_dir, f"{_SFE.STRUCT_SPLIT_BASE}_receptor1.mae"), + f"{_SEE.STRUCTCAT_I}sd", + ] + + for file in os.listdir(tmp_dir): + if file.endswith("sdf"): + arguments.append(os.path.join(tmp_dir, file)) + arguments.append(f"{_SEE.STRUCTCAT_O}{out_file_type}") + arguments.append(os.path.join(tmp_dir, outfile)) + return arguments + + def _concatenate_pv_files(self, tmp_dir: str): + # create a poseviewer-formatted file with receptor structure, then docked ligand poses + arguments = self._get_structcat_args( + tmp_dir=tmp_dir, out_file_type="mae", outfile=_SFE.STRUCTCAT_MAEGZ_OUTFILE + ) + self._schrodinger_executor.execute( + command=_SEE.STRUCTCAT, arguments=arguments, check=True + ) + + def _analyse_map(self, tmp_dir): + """run fmp_stats program to analyse map - generate node similarities etc""" + result = self._schrodinger_executor.execute( + command=_SEE.FMP_STATS, + arguments=["out.fmp", "-f"], + check=True, + location=tmp_dir, + ) + log_lines = [] + for line in str(result.stdout).split("\n"): + self._logger_blank.log(line, _LE.INFO) + log_lines.append(line + "\n") + + self.data.generic.add_file( + GenericData(file_name="fep_mapper.log", file_data=log_lines) + ) + + def _parse_output(self, tmp_dir: str): + # needs to retrieve the edge and fmp files produced by the mapper step and attach to the generic dict + files = [ + os.path.join(tmp_dir, f) + for f in os.listdir(tmp_dir) + if f.endswith(("fmp", "edge", "log")) + ] + + for file in files: + try: + with open(file, "r") as f: + data = f.read() + except UnicodeDecodeError: + with open(file, "rb") as f: + data = f.read() + self._add_data_to_generic(file, data) + + def execute(self): + # run the job in a temporary directory + tmp_dir = self._make_tmpdir() + + self._write_input_files(tmp_dir) + self._concatenate_pv_files(tmp_dir) + io_dict = { + "": os.path.join(tmp_dir, _SFE.STRUCTCAT_MAEGZ_OUTFILE), + "-o": _SFE.FEP_MAPPER_OUTPUT, + } + arguments = self._parse_arguments(io_dict=io_dict) + self._apply_token_guard() # need to implement for reliability + self._logger.log("Optimising perturbation map", _LE.DEBUG) + self._backend_executor.execute( + command=_FE.FEP_MAPPER, arguments=arguments, check=True, location=tmp_dir + ) + assert os.path.isfile(os.path.join(tmp_dir, "out.fmp")) + self._logger.log( + f"Successfully executed fep_mapper in directory {tmp_dir}.", _LE.DEBUG + ) + + self._logger.log("Analysing the perturbation map.", _LE.DEBUG) + self._analyse_map(tmp_dir) + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/schrodinger/glide.py b/icolos/core/workflow_steps/schrodinger/glide.py new file mode 100644 index 0000000..af2cc1d --- /dev/null +++ b/icolos/core/workflow_steps/schrodinger/glide.py @@ -0,0 +1,635 @@ +import gzip +import os +import shutil +import tempfile +from copy import deepcopy +from typing import List, Tuple + +from pydantic import BaseModel +from rdkit import Chem + +from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase +from icolos.utils.execute_external.glide import GlideExecutor +from icolos.utils.execute_external.schrodinger import SchrodingerExecutor +from icolos.utils.general.files_paths import any_in_file, gen_tmp_file + +from icolos.core.containers.compound import Conformer + +from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum, GlideEnum +from icolos.utils.enums.step_enums import StepGlideEnum, StepBaseEnum +from icolos.core.workflow_steps.step import _LE +from icolos.utils.general.parallelization import SubtaskContainer, Parallelizer, Subtask +from icolos.utils.general.strings import stringify + + +class GlideSupportEnum: + + GLIDE_INPUTBLOCK_COMMASEPARATED = [ + "CONSTRAINT_GROUP" + ] # define list of block keys which are to have commas + GLIDE_INPUTBLOCK_VALUEQUOTED = [ + "FEATURE" + ] # define list of block keys, where values are to be put + # into double quotation marks + + GLIDE_TG_WAIT_INTERVAL = "wait_interval_seconds" + GLIDE_TG_WAIT_LIMIT = "wait_limit_seconds" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +_SBE = StepBaseEnum +_EE = GlideEnum() +_SGE = StepGlideEnum() +_SEE = SchrodingerExecutablesEnum() +_GSE = GlideSupportEnum() + + +class StepGlide(StepSchrodingerBase, BaseModel): + + _schrodinger_executor: SchrodingerExecutor = None + + class Config: + underscore_attrs_are_private = True + + def __init__(self, **data): + super().__init__(**data) + + # initialize the executors and test availability + self._initialize_backend(executor=GlideExecutor) + self._check_backend_availability() + + self._schrodinger_executor = SchrodingerExecutor( + prefix_execution=self.execution.prefix_execution, + binary_location=self.execution.binary_location, + ) + + def _get_scores_from_conformer(self, conformer: Chem.Mol) -> Tuple[float, float]: + return ( + float(conformer.GetProp(_SGE.GLIDE_DOCKING_SCORE)), + float(conformer.GetProp(_SGE.GLIDE_GSCORE)), + ) + + def _set_docking_score(self, conformer: Chem.Mol) -> bool: + try: + docking_score, g_score = self._get_scores_from_conformer(conformer) + except KeyError: + return False + conformer.SetProp(_SBE.ANNOTATION_TAG_DOCKING_SCORE, str(docking_score)) + conformer.SetProp(_SBE.ANNOTATION_TAG_G_SCORE, str(g_score)) + return True + + def _generate_temporary_input_output_files( + self, batch: List[List[Subtask]] + ) -> Tuple[List[str], List[str], List[str], List[str]]: + tmp_output_dirs = [] + tmp_input_mae_paths = [] + tmp_output_sdf_paths = [] + tmp_output_maegz_paths = [] + + for next_subtask_list in batch: + # generate temporary input files and output directory + cur_tmp_output_dir = tempfile.mkdtemp() + _, cur_tmp_sdf = gen_tmp_file(suffix=".sdf", dir=cur_tmp_output_dir) + _, cur_tmp_mae = gen_tmp_file(suffix=".mae", dir=cur_tmp_output_dir) + + # write-out the temporary input file + writer = Chem.SDWriter(cur_tmp_sdf) + one_written = False + for subtask in next_subtask_list: + enumeration = subtask.data + mol = deepcopy(enumeration.get_molecule()) + if mol is not None: + mol.SetProp("_Name", enumeration.get_index_string()) + one_written = True + writer.write(mol) + writer.close() + if one_written is False: + self._remove_temporary(cur_tmp_output_dir) + continue + + # translate the SDF into a MAE file + self._translate_SDF_to_MAE( + sdf_path=cur_tmp_sdf, + mae_path=cur_tmp_mae, + executor=self._schrodinger_executor, + ) + + # add the path to which "_dock_subjob()" will write the result SDF + _, output_sdf_path = gen_tmp_file( + suffix="_result.sdf", dir=cur_tmp_output_dir + ) + _, output_maegz_path = gen_tmp_file( + suffix="_result.maegz", dir=cur_tmp_output_dir, text=False + ) + tmp_output_sdf_paths.append(output_sdf_path) + tmp_output_maegz_paths.append(output_maegz_path) + tmp_input_mae_paths.append(cur_tmp_mae) + tmp_output_dirs.append(cur_tmp_output_dir) + return ( + tmp_output_dirs, + tmp_input_mae_paths, + tmp_output_sdf_paths, + tmp_output_maegz_paths, + ) + + def _all_keywords(self) -> dict: + """Returns joined keywords from JSON and from .in file (if specified).""" + + keywords = {} + + # keywords from maestro file; they can be overwritten by explicitly set values from the "configuration" block + maestro_in_file = deepcopy( + self.settings.additional.get(_SGE.MAESTRO_IN_FILE, None) + ) + if maestro_in_file is not None: + with open(maestro_in_file[_SGE.MAESTRO_IN_FILE_PATH], "rt") as f: + keywords_from_file = self._parse_maestro_in_file(f.readlines()) + keywords.update(keywords_from_file) + + # Add keywords from advanced_glide_keywords + # (they are keywords with file paths), + # skipping keywords that are None. + # Also skip maestro file - that's not a keyword. + # TODO: This is legacy code from DockStream's implementation, which was necessary to accommodate the GUI. + # Remove? + # if self.parameters.advanced_glide_keywords is not None: + # adv_kw = stringify({ + # k: v + # for k, v in self.parameters.advanced_glide_keywords.dict().items() + # if v is not None and k not in {'maestro_file'} + # }) + # keywords.update(adv_kw) + + # Add "ordinary" keywords, overwriting existing ones. + json_keywords = stringify( + deepcopy(self.settings.additional.get(_SGE.CONFIGURATION, {})) + ) + keywords.update( + json_keywords + ) # Overwrites any keywords that are already present. + return keywords + + def _configuration_Maestro_reformat(self, configuration: dict): + # rewrite keyword input file in Maestro format + maestro_indent = " " + maestro_spacing = " " + + element_lines = [] + block_lines = [] + + for key in configuration.keys(): + if isinstance(configuration[key], str): + # keyword holds one dictionary (string) only + element_lines.append( + maestro_spacing.join([key, configuration[key] + "\n"]) + ) + elif isinstance(configuration[key], dict): + # keyword holds a composite block and has no dictionary (e.g. constraints); note, that these must + # always be at the end of the file + block_lines.append("\n" + key + "\n") + block = configuration[key] + for key_idx, block_key in enumerate(block.keys()): + block_value = block[block_key] + + # if this is a value in certain blocks, put it into double quotation marks as spaces are present + if any([x in key for x in _GSE.GLIDE_INPUTBLOCK_VALUEQUOTED]): + block_value = '"' + block_value + '"' + line = maestro_indent + maestro_spacing.join( + [block_key, block_value] + ) + + # add comma to block definition, if there are more lines to come and the block requires it + # note, that not all blocks in GLIDE require this; in some cases, the comma is already part of + # the line (then skip it!) + if any([x in key for x in _GSE.GLIDE_INPUTBLOCK_COMMASEPARATED]): + if (key_idx + 1) < len(block) and line[-1] != ",": + line = line + "," + + block_lines.append(line + "\n") + else: + raise Exception( + f"Cannot handle type {type(configuration[key])} in configuration file specification, only use strings and blocks." + ) + + return element_lines, block_lines + + def _write_configuration_to_file(self, configuration: dict, path: str): + """Function to generate a keyword input file in Maestro format.""" + + # call a function that returns the input keywords in Maestro format + element_lines, block_lines = self._configuration_Maestro_reformat( + configuration=configuration + ) + + # arrange the elements and blocks + if path is None: + _, path = gen_tmp_file(suffix=".in") + with open(path, mode="w") as f: + self._logger.log(f"Writing GLIDE input file {path}:\n", _LE.DEBUG) + for line in element_lines: + f.write(line) + self._logger_blank.log(line.rstrip("\n"), _LE.DEBUG) + for line in block_lines: + f.write(line) + self._logger_blank.log(line.rstrip("\n"), _LE.DEBUG) + self._logger_blank.log("", _LE.DEBUG) + self._logger.log("--- End file", _LE.DEBUG) + + def _get_time_limit_per_task(self): + # for "SP" method, it can be expected to that about 90 s / ligand is required at most + # use a bit extra + return int(self.settings.additional.get(_SGE.TIME_LIMIT_PER_TASK, 120)) + + def _get_path_tmp_results( + self, glide_pose_outtype: str, base_path: str + ) -> Tuple[str, str]: + if glide_pose_outtype == _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB: + path_tmp_results = os.path.join( + os.path.dirname(base_path), + "".join( + [ + os.path.splitext(os.path.basename(base_path))[0], + _SGE.GLIDE_SDF_DEFAULT_EXTENSION, + ] + ), + ) + elif glide_pose_outtype == _EE.GLIDE_POSE_OUTTYPE_POSEVIEWER: + path_tmp_results = os.path.join( + os.path.dirname(base_path), + "".join( + [ + os.path.splitext(os.path.basename(base_path))[0], + _SGE.GLIDE_MAEGZ_DEFAULT_EXTENSION, + ] + ), + ) + else: + raise NotImplementedError( + f"Specified out-type {glide_pose_outtype} for Glide not supported." + ) + + path_tmp_log = os.path.join( + os.path.dirname(base_path), + "".join([os.path.splitext(os.path.basename(base_path))[0], _SGE.GLIDE_LOG]), + ) + return path_tmp_results, path_tmp_log + + def _move_result_files( + self, + glide_pose_outtype: str, + path_tmp_results: str, + path_sdf_results: str, + path_maegz_results: str, + ): + if glide_pose_outtype == _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB: + if os.path.isfile(path_tmp_results): + with gzip.open(path_tmp_results, "rb") as fin: + with open(path_sdf_results, "wb") as fout: + shutil.copyfileobj(fin, fout) + elif glide_pose_outtype == _EE.GLIDE_POSE_OUTTYPE_POSEVIEWER: + # as the output is in MAEGZ format, we need to translate it into an SDF (and move the original file to the + # expected path) + self._translate_MAE_to_SDF( + mae_path=path_tmp_results, + sdf_path=path_sdf_results, + executor=self._schrodinger_executor, + ) + os.rename(path_tmp_results, path_maegz_results) + else: + raise NotImplementedError( + f"Specified out-type {glide_pose_outtype} for Glide not supported." + ) + + def _run_subjob( + self, + mae_ligand_path, + path_sdf_results, + path_maegz_results, + tmp_output_dir, + grid_path, + sublist, + ): + # 1) increase the sublist "tries" and set status to "failed" + _ = [task.increment_tries() for task in sublist] + _ = [task.set_status_failed() for task in sublist] + + # 2) change to directory, to be able to use relative paths (to compensate for Schrodinger bug with AWS) + working_dir = os.getcwd() + os.chdir(tmp_output_dir) + + # 3) get "keywords" dictionary and overwrite necessary values + # add "LIGANDFILE" keyword to list of keywords: full path to "mae" formatted ligands + configuration = self._all_keywords() + if configuration is None: + raise ValueError( + f"You need to specify at least the gridfile path in the configuration for Glide." + ) + configuration[_EE.GLIDE_LIGANDFILE] = mae_ligand_path + + # set the path to the grid file for this run + configuration[_EE.GLIDE_GRIDFILE] = grid_path + + # if not set, set the liand pose outtype to "LIGANDLIB" (SDF output without receptor) + glide_pose_outtype = configuration.get( + _EE.GLIDE_POSE_OUTTYPE, _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB + ) + configuration[_EE.GLIDE_POSE_OUTTYPE] = glide_pose_outtype + + # 4) write the keyword-input file for the "Glide" backend; write-out to temporary file + _, glide_configuration_path = gen_tmp_file(suffix=".in", dir=tmp_output_dir) + self._write_configuration_to_file( + configuration=configuration, + path=glide_configuration_path, + ) + + # 5) wait / sleep until job is completed + # Note, that while Glide has an option "-WAIT", this does not seem to work when getting back + # data from AWS (probably it ends before copying back the data properly); stay with this solution for now + path_tmp_results, path_tmp_log = self._get_path_tmp_results( + glide_pose_outtype=glide_pose_outtype, base_path=glide_configuration_path + ) + + # 5) execute the "Glide" backend + arguments = self._prepare_glide_arguments(glide_configuration_path) + execution_result = self._backend_executor.execute( + command=_EE.GLIDE, + arguments=arguments, + check=True, + location=os.path.dirname(glide_configuration_path), + ) + + # 6) check return code (anything but '0' is bad) and add "stdout" to log file + time_exceeded = False + if execution_result.returncode != 0: + msg = ( + f"Could not dock with Glide, error message: {execution_result.stdout}." + ) + self._logger.log(msg, _LE.ERROR) + self._print_log_file(path_tmp_log) + raise RuntimeError() + else: + if ( + self._wait_until_file_generation( + path=path_tmp_results, + path_log=path_tmp_log, + interval_sec=10, + maximum_sec=max( + self._get_time_limit_per_task() * len(sublist), 300 + ), + success_strings=_EE.GLIDE_LOG_FINISHED_STRINGS, + fail_strings=_EE.GLIDE_LOG_FAIL_STRINGS, + ) + is False + ): + time_exceeded = True + self._logger.log( + f"Sublist docking for output file {path_tmp_results} exceeded time limit or failed, " + f"all these ligands are ignored in the final write-out. This could mean that none of " + f"them could be docked or a runtime error in Glide occured.", + _LE.DEBUG, + ) + + # 6) load the log-file (if generated) and check if all went well + if ( + any_in_file(path_tmp_log, _EE.GLIDE_LOG_SUCCESS_STRING) + and time_exceeded is False + ): + self._logger.log( + f"Finished sublist (input: {mae_ligand_path}, output: {path_sdf_results}).", + _LE.DEBUG, + ) + else: + self._print_log_file(path_tmp_log) + + # 7) collect the results; Glide outputs the sdf with a given, semi-hard-coded path; extract the sdf file + self._move_result_files( + glide_pose_outtype=glide_pose_outtype, + path_tmp_results=path_tmp_results, + path_sdf_results=path_sdf_results, + path_maegz_results=path_maegz_results, + ) + + # 8) revert back to working directory + os.chdir(working_dir) + + def _prepare_glide_arguments(self, glide_configuration_path: str) -> List[str]: + # Note, that the first argument is the path to the configuration input file + # If the number of cores has been set, overwrite "N_JOBS" and parallelize internally and also note + # that each subjob requires a license; instead start each with "N_JOBS" = 1 + arguments = [glide_configuration_path] + + # copy parameters and overwrite as necessary + parameters = deepcopy(self.settings.arguments.parameters) + parameters[_EE.GLIDE_NJOBS] = 1 + + if len(self.settings.arguments.flags) > 0: + for flag in self.settings.arguments.flags: + # -WAIT leads to issues at times: The process may not return properly + # (e.g. because of writing problems) and then gets stuck; workaround with waiting + # for file completion, so remove it if set + if flag not in [_EE.GLIDE_WAIT]: + arguments.append(str(flag)) + if parameters: + for key in parameters.keys(): + # remove "-WAIT" if set as a parameter, as this leads to instability issues and ignore empty keys + if key == _EE.GLIDE_WAIT or key == "": + continue + arguments.append(key) + if parameters[key] is not None and parameters[key] != "": + arguments.append(str(parameters[key])) + return arguments + + def _execute_glide(self, grid_id: str, grid_path: str): + # TODO: add individual resubmission for failed subtasks + # get number of sublists in batch and initialize Parallelizer + glide_parallelizer = Parallelizer(func=self._run_subjob) + + # continue until everything is successfully done or number of retries have been exceeded + while self._subtask_container.done() is False: + next_batch = self._get_sublists(get_first_n_lists=self._get_number_cores()) + + # generate paths and initialize molecules (so that if they fail, this can be covered) + ( + tmp_output_dirs, + tmp_input_mae_paths, + tmp_output_sdf_paths, + tmp_output_maegz_paths, + ) = self._generate_temporary_input_output_files(next_batch) + + # call "token guard" method (only executed, if block is specified in the configuration), which will wait + # with the execution if not enough tokens are available at the moment + self._apply_token_guard() + + # execute the current batch in parallel; hand over lists of parameters (will be handled by Parallelizer) + # also increment the tries and set the status to "failed" (don't do that inside subprocess, as data is + # copied, not shared!) + _ = [sub.increment_tries() for element in next_batch for sub in element] + _ = [sub.set_status_failed() for element in next_batch for sub in element] + list_grid_path = [grid_path for _ in tmp_input_mae_paths] + glide_parallelizer.execute_parallel( + mae_ligand_path=tmp_input_mae_paths, + path_sdf_results=tmp_output_sdf_paths, + path_maegz_results=tmp_output_maegz_paths, + tmp_output_dir=tmp_output_dirs, + grid_path=list_grid_path, + sublist=next_batch, + ) + + # parse the output of that particular batch and remove temporary files + self._parse_glide_output( + tmp_output_sdf_paths, + tmp_output_maegz_paths, + next_batch, + grid_id, + grid_path, + ) + + # clean-up + self._remove_temporary(tmp_output_dirs) + + # print the progress for this execution + self._log_execution_progress() + + def _log_execution(self, grid_id: str, number_grids: int): + number_enumerations = 0 + number_conformers = 0 + for compound in self.get_compounds(): + number_enumerations += len(compound) + for enumeration in compound: + number_conformers += len(enumeration) + if len(enumeration) == 0: + self._logger.log( + f"Enumeration {enumeration.get_index_string()} has no docked poses attached.", + _LE.DEBUG, + ) + self._logger.log( + f"Executed Schrodinger/Glide backend for grid {grid_id} (of {number_grids}), now storing a total of {number_conformers} conformers for {number_enumerations} enumerations in {len(self.get_compounds())} compounds.", + _LE.INFO, + ) + + def _parse_glide_output( + self, + tmp_output_sdf_paths: List[str], + tmp_output_maegz_paths: List[str], + batch: List[List[Subtask]], + grid_id: str, + grid_path: str, + ): + # TODO: refactor that (recombine with ligprep parsing?) + def _update_subtask(sublist: List[Subtask], enum_identifier: str): + for task in sublist: + if task.data.get_index_string() == enum_identifier: + task.set_status_success() + + def _add_poseviewer_file(conformer: Conformer, maegz_path: str): + if os.path.isfile(maegz_path) and os.path.getsize(maegz_path) > 0: + with open(maegz_path, "rb") as f: + conformer.add_extra_data( + key=_SGE.GLIDE_POSEVIEWER_FILE_KEY, data=f.read() + ) + + for i in range(len(tmp_output_sdf_paths)): + # get input and output paths and check the files are there + path_sdf_results = tmp_output_sdf_paths[i] + path_maegz_results = tmp_output_maegz_paths[i] + cur_sublist = batch[i] + + # this is a protection against the case where empty (file size == 0 bytes) files are generated due to + # a failure during docking + if ( + not os.path.isfile(path_sdf_results) + or os.path.getsize(path_sdf_results) == 0 + ): + continue + + mol_supplier = Chem.SDMolSupplier(path_sdf_results, removeHs=False) + for mol in mol_supplier: + if mol is None: + continue + cur_enumeration_name = str(mol.GetProp("_Name")) + + # add the information on the actual grid used + mol.SetProp(_SBE.ANNOTATION_GRID_ID, str(grid_id)) + mol.SetProp(_SBE.ANNOTATION_GRID_PATH, str(grid_path)) + mol.SetProp(_SBE.ANNOTATION_GRID_FILENAME, os.path.basename(grid_path)) + + # if no docking score is attached (i.e. the molecule is a receptor or so, skip it) + if self._set_docking_score(mol) is not True: + continue + + # add molecule to the appropriate ligand + for compound in self.get_compounds(): + for enumeration in compound: + if enumeration.get_index_string() == cur_enumeration_name: + new_conformer = Conformer( + conformer=mol, + conformer_id=None, + enumeration_object=enumeration, + ) + _add_poseviewer_file( + conformer=new_conformer, maegz_path=path_maegz_results + ) + enumeration.add_conformer(new_conformer, auto_update=True) + _update_subtask( + cur_sublist, enum_identifier=cur_enumeration_name + ) + break + + def _sort_conformers(self): + # sort the conformers (best to worst) and update their names to contain the conformer id + # -> :: + for compound in self.get_compounds(): + for enumeration in compound: + enumeration.sort_conformers( + by_tag=_SGE.GLIDE_DOCKING_SCORE, reverse=False + ) + + def execute(self): + # in order to be able to efficiently execute Glide on the enumeration level, all of them have to be unrolled + # Note: As they retain their respective Compound object, the attribution later on is simple + all_enumerations = [] + for compound in self.get_compounds(): + all_enumerations = all_enumerations + compound.get_enumerations() + for enumeration in compound: + enumeration.clear_conformers() + + # to allow ensemble docking, loop over all provided grid files and annotate the origin of the conformers + gridfiles = deepcopy(self.settings.additional.get(_SGE.CONFIGURATION, None))[ + _EE.GLIDE_GRIDFILE + ] + if not isinstance(gridfiles, list): + gridfiles = [gridfiles] + + # set grid ids (generate indices, if not specified) + grid_ids = self.settings.additional.get(_SBE.GRID_IDS, []) + if len(grid_ids) != len(gridfiles): + self._logger.log( + f"There were {len(grid_ids)} grid_ids specified for {len(gridfiles)}, using indices instead.", + _LE.DEBUG, + ) + grid_ids = [str(idx) for idx in range(len(gridfiles))] + + for grid_id, grid_path in zip(grid_ids, gridfiles): + # split into sublists, according to the settings + self._subtask_container = SubtaskContainer( + max_tries=self.execution.failure_policy.n_tries + ) + self._subtask_container.load_data(all_enumerations) + + # execute Glide + self._execute_glide(grid_id=grid_id, grid_path=grid_path) + + # do the logging + self._log_execution(grid_id=grid_id, number_grids=len(gridfiles)) + + # sort the conformers loaded to the enumerations + self._sort_conformers() diff --git a/icolos/core/workflow_steps/schrodinger/ligprep.py b/icolos/core/workflow_steps/schrodinger/ligprep.py new file mode 100644 index 0000000..01cf55e --- /dev/null +++ b/icolos/core/workflow_steps/schrodinger/ligprep.py @@ -0,0 +1,322 @@ +import os +import tempfile +from typing import List + +from pydantic import BaseModel +from rdkit import Chem + +from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase +from icolos.utils.execute_external.ligprep import LigprepExecutor +from icolos.utils.general.files_paths import gen_tmp_file + +from icolos.utils.general.molecules import get_charge_for_molecule +from icolos.core.containers.compound import Enumeration, Conformer, get_compound_by_id + +from icolos.utils.enums.program_parameters import ( + LigprepEnum, +) +from icolos.utils.enums.step_enums import StepLigprepEnum +from icolos.core.workflow_steps.step import _LE, _CTE +from icolos.utils.general.parallelization import SubtaskContainer, Parallelizer, Subtask +from icolos.utils.general.print_log import print_log_file +from icolos.utils.smiles import to_smiles + +_EE = LigprepEnum() +_SLE = StepLigprepEnum() + + +class StepLigprep(StepSchrodingerBase, BaseModel): + """ + Interface to the LigPrep binary for ligand embedding + """ + + class Config: + underscore_attrs_are_private = True + + def __init__(self, **data): + super().__init__(**data) + + # initialize the executor and test availability + self._initialize_backend(executor=LigprepExecutor) + self._check_backend_availability() + + def _prepare_ligprep_arguments(self) -> list: + arguments_list = [] + + # add user-specified command-line settings (if provided); note, that empty dictionaries evaluate + # to False + if len(self.settings.arguments.flags) > 0: + for flag in self.settings.arguments.flags: + arguments_list.append(str(flag)) + if self.settings.arguments.parameters: + for key in self.settings.arguments.parameters.keys(): + if key == _EE.LIGPREP_F: + self._logger.log( + 'Removing "-f" parameter for Ligprep arguments - filter file settings need to be specified in the "additional" block directly.', + _LE.WARNING, + ) + continue + + arguments_list.append(key) + if ( + self.settings.arguments.parameters[key] is not None + and self.settings.arguments.parameters[key] != "" + ): + arguments_list.append(str(self.settings.arguments.parameters[key])) + + # add default settings, that are not exposed to the user yet + if _EE.LIGPREP_HOST not in arguments_list: + arguments_list.append(_EE.LIGPREP_HOST) + arguments_list.append(_EE.LIGPREP_HOST_LOCALHOST) + arguments_list.append(_EE.LIGPREP_WAIT) + arguments_list = arguments_list + [_EE.LIGPREP_NJOBS, 1] + + return arguments_list + + def _generate_temporary_input_output_files(self, batch: List[List[Subtask]]): + tmp_output_dirs = [] + tmp_input_smi_paths = [] + tmp_input_filter_paths = [] + tmp_output_sdf_paths = [] + dict_original_smiles = {} + + for next_subtask_list in batch: + # generate temporary input files and output directory + cur_tmp_output_dir = tempfile.mkdtemp() + _, cur_tmp_smi = gen_tmp_file(suffix=".smi", dir=cur_tmp_output_dir) + _, cur_tmp_filter = gen_tmp_file(suffix=".lff", dir=cur_tmp_output_dir) + + # write smiles to temporary file as "Ligprep" backend + with open(cur_tmp_smi, "w") as f: + for subtask in next_subtask_list: + enumeration = subtask.data + dict_original_smiles[ + enumeration.get_index_string() + ] = enumeration.get_original_smile() + f.write( + enumeration.get_original_smile() + + " " + + enumeration.get_index_string() + + "\n" + ) + + # add the path to which "_dock_subjob()" will write the result SDF + _, output_sdf_path = gen_tmp_file( + suffix="_result.sdf", dir=cur_tmp_output_dir + ) + + # add the temporary paths + tmp_output_dirs.append(cur_tmp_output_dir) + tmp_input_smi_paths.append(cur_tmp_smi) + tmp_input_filter_paths.append(cur_tmp_filter) + tmp_output_sdf_paths.append(output_sdf_path) + return ( + tmp_output_dirs, + tmp_input_smi_paths, + tmp_output_sdf_paths, + tmp_input_filter_paths, + dict_original_smiles, + ) + + def _execute_ligprep(self): + # TODO: add individual resubmission for failed subtasks + # get number of sublists in batch and initialize Parallelizer + ligprep_parallelizer = Parallelizer(func=self._run_subjob) + + # continue until everything is successfully done or number of retries have been exceeded + while self._subtask_container.done() is False: + next_batch = self._get_sublists(get_first_n_lists=self._get_number_cores()) + + # generate paths and initialize molecules (so that if they fail, this can be covered) + ( + tmp_output_dirs, + tmp_input_smi_paths, + tmp_output_sdf_paths, + tmp_input_filter_paths, + dict_original_smiles, + ) = self._generate_temporary_input_output_files(next_batch) + + # call "token guard" method (only executed, if block is specified in the configuration), which will wait + # with the execution if not enough tokens are available at the moment + self._apply_token_guard() + + # execute the current batch in parallel; hand over lists of parameters (will be handled by Parallelizer) + # also increment the tries and set the status to "failed" (don't do that inside subprocess, as data is + # copied, not shared!) + _ = [sub.increment_tries() for element in next_batch for sub in element] + _ = [sub.set_status_failed() for element in next_batch for sub in element] + ligprep_parallelizer.execute_parallel( + smi_ligand_path=tmp_input_smi_paths, + path_sdf_results=tmp_output_sdf_paths, + tmp_output_dir=tmp_output_dirs, + tmp_input_filter=tmp_input_filter_paths, + sublist=next_batch, + ) + + # parse the output of that particular batch and remove temporary files + self._parse_ligprep_output( + tmp_output_sdf_paths, dict_original_smiles, next_batch + ) + self._remove_temporary(tmp_output_dirs) + + # print the progress for this execution + self._log_execution_progress() + + def _parse_ligprep_output( + self, + tmp_output_sdf_paths: List[str], + dict_original_smiles: dict, + batch: List[List[Subtask]], + ): + # TODO: refactor that + def _update_subtask(sublist: List[Subtask], enum_identifier: str): + for task in sublist: + if task.data.get_index_string() == enum_identifier: + task.set_status_success() + + for i in range(len(tmp_output_sdf_paths)): + # get input and output paths and check the files are there + path_sdf_results = tmp_output_sdf_paths[i] + cur_sublist = batch[i] + if ( + not os.path.isfile(path_sdf_results) + or os.path.getsize(path_sdf_results) == 0 + ): + continue + + mol_supplier = Chem.SDMolSupplier(path_sdf_results, removeHs=False) + for mol in mol_supplier: + # Ligprep adds a "-1" to "-[N]" to the names in the variants tag; this tag is always added + # alternatively, the "_Name" property could be loaded + # TODO: add loading only the most likely tautomer here (based on _SLE.LIGPREP_TAUTOMER_PROBABILITY) + if mol is not None and mol.HasProp(_SLE.LIGPREP_VARIANTS): + identifier, _ = mol.GetProp(_SLE.LIGPREP_VARIANTS).split("-") + compound_id, enumeration_id = identifier.split(":") + compound = get_compound_by_id( + self.get_compounds(), int(compound_id) + ) + enumeration = Enumeration( + compound_object=compound, + smile=to_smiles(mol), + original_smile=dict_original_smiles[identifier], + molecule=mol, + ) + compound.add_enumeration(enumeration, auto_update=True) + _update_subtask(cur_sublist, enum_identifier=identifier) + else: + self._logger.log( + f"Skipped molecule when loading as specified property {_SLE.LIGPREP_VARIANTS} could not be found - typically, this indicates that ligprep could not embed the molecule.", + _LE.WARNING, + ) + + def _add_filtering(self, arguments: list, tmp_input_filter: str) -> list: + filter_file_settings = self.settings.additional.get(_SLE.FILTER_FILE, None) + if filter_file_settings is not None: + filter_file = open(tmp_input_filter, "w") + for key in filter_file_settings.keys(): + filter_file.write( + f"{key} {filter_file_settings[key]}\n" + ) + filter_file.close() + arguments = arguments + [_EE.LIGPREP_F, tmp_input_filter] + return arguments + + def _run_subjob( + self, + smi_ligand_path: str, + path_sdf_results: str, + tmp_output_dir: str, + tmp_input_filter: str, + sublist: List[Subtask], + ): + # 1) increase the sublist "tries" and set status to "failed" + _ = [task.increment_tries() for task in sublist] + _ = [task.set_status_failed() for task in sublist] + + # 2) change to directory, to be able to use relative paths (to compensate for Schrodinger bug with AWS) + working_dir = os.getcwd() + os.chdir(tmp_output_dir) + + # 3) prepare "Ligprep" arguments + arguments = self._prepare_ligprep_arguments() + arguments = self._add_filtering(arguments, tmp_input_filter) + arguments = arguments + [ + _EE.LIGPREP_INPUT_ISMI, + os.path.basename(smi_ligand_path), + ] + arguments = arguments + [ + _EE.LIGPREP_OUTPUT_OSD, + os.path.basename(path_sdf_results), + ] + + # 4) run "Ligprep" backend and add log file to "debug" mode logging + result = self._backend_executor.execute( + command=_EE.LIGPREP, + arguments=arguments, + location=tmp_output_dir, + check=False, + ) + + self._logger.log( + f"Executed Ligprep backend (output file: {path_sdf_results}).", _LE.DEBUG + ) + path_tmp_log = os.path.join( + tmp_output_dir, + "".join( + [ + os.path.splitext(os.path.basename(path_sdf_results))[0], + _EE.LIGPREP_LOG_ENDING, + ] + ), + ) + print_log_file(path=path_tmp_log, logger=self._logger, level=_LE.DEBUG) + + # 5) revert back to working directory + os.chdir(working_dir) + + def _parse_ligprep_result( + self, sdf_output: str, enumeration: Enumeration + ) -> List[Conformer]: + charge = str( + get_charge_for_molecule(enumeration.get_molecule(), add_as_tag=False) + ) + mol_supplier = Chem.SDMolSupplier(sdf_output, removeHs=False) + conformers = [] + for mol_id, mol in enumerate(mol_supplier): + # note, that formal charge information would be kept if available before (i.e. it retains tags) + mol.SetProp(_CTE.FORMAL_CHARGE_TAG, charge) + conformers.append(Conformer(conformer=mol)) + return conformers + + def _log_execution(self, initial_enum_number: int): + number_enumerations_after = 0 + for compound in self.get_compounds(): + number_enumerations_after += len(compound.get_enumerations()) + self._logger.log( + f"Executed LigPrep for {initial_enum_number} input enumerations, resulting in {number_enumerations_after} output enumerations.", + _LE.INFO, + ) + for compound in self.get_compounds(): + for enumeration in compound.get_enumerations(): + self._logger.log( + f"Added enumeration {enumeration.get_index_string()} with smile {enumeration.get_smile()}.", + _LE.DEBUG, + ) + + def execute(self): + # in order to be able to efficiently execute Ligprep on the enumeration level, all of them have to be unrolled + # Note: As they retain their respective Compound object, the attribution later on is simple + all_enumerations = [] + for compound in self.get_compounds(): + all_enumerations = all_enumerations + compound.get_enumerations() + compound.clear_enumerations() + # TODO: we will use the "original_smile" of the enumeration to start the embedding; make sure it exists + + # split into sublists, according to the settings + self._subtask_container = SubtaskContainer( + max_tries=self.execution.failure_policy.n_tries + ) + self._subtask_container.load_data(all_enumerations) + + self._execute_ligprep() + self._log_execution(initial_enum_number=len(all_enumerations)) diff --git a/icolos/core/workflow_steps/schrodinger/macromodel.py b/icolos/core/workflow_steps/schrodinger/macromodel.py new file mode 100644 index 0000000..b67cda8 --- /dev/null +++ b/icolos/core/workflow_steps/schrodinger/macromodel.py @@ -0,0 +1,160 @@ +import os +import subprocess +from typing import Tuple, List + +from pydantic import BaseModel, PrivateAttr +from rdkit import Chem + +from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase +from icolos.utils.execute_external.macromodel import MacromodelExecutor + +from icolos.utils.general.molecules import get_charge_for_molecule + +from icolos.core.containers.compound import Enumeration, Conformer + +from icolos.utils.enums.program_parameters import ( + MacromodelEnum, +) +from icolos.utils.enums.step_enums import StepMacromodelEnum +from icolos.core.workflow_steps.step import _LE, _CTE +from icolos.core.step_utils.sdconvert_util import SDConvertUtil + +_EE = MacromodelEnum() +_MMSE = StepMacromodelEnum() + + +class StepMacromodel(StepSchrodingerBase, BaseModel): + class Config: + underscore_attrs_are_private = True + + _sdconvert_util = PrivateAttr() + + def __init__(self, **data): + super().__init__(**data) + + # initialize the executor and test availability + self._initialize_backend(executor=MacromodelExecutor) + self._check_backend_availability() + + # prepare sdconvert utility + self._sdconvert_util = SDConvertUtil( + prefix_execution=self.execution.prefix_execution, + binary_location=self.execution.binary_location, + ) + + # extend parameters with the COM file default, if not present + if _MMSE.COM_FILE not in self.settings.arguments.parameters.keys(): + self.settings.arguments.parameters[_MMSE.COM_FILE] = _MMSE.COM_FILE_DEFAULT + + def _execute_macromodel(self, com_file: str) -> subprocess.CompletedProcess: + self._logger.log( + f"Executing MacroModel backend for com_file {com_file}.", _LE.DEBUG + ) + arguments = [] + for key in self.settings.arguments.parameters.keys(): + # TODO: disentangle "special behaviour" for this key - move the com_file specification to a separate block + # in the configuration + if key != _MMSE.COM_FILE: + arguments.append(key) + arguments.append(str(self.settings.arguments.parameters[key])) + for flag in self.settings.arguments.flags: + arguments.append(str(flag)) + arguments.append(com_file) + self._apply_token_guard() + result = self._backend_executor.execute( + command=_EE.MACROMODEL, arguments=arguments, check=True + ) + return result + + def _set_formal_charge(self, parameters: dict, molecule: Chem.Mol) -> dict: + charge = get_charge_for_molecule(molecule) + parameters[_EE.XTB_CHRG] = charge + self._logger.log(f"Set charge for molecule to {charge}.", _LE.DEBUG) + return parameters + + def _prepare_file_paths(self, tmp_dir: str) -> Tuple[str, str, str]: + # generate the paths to the temporary files + mae_input = os.path.join(tmp_dir, _MMSE.MAE_INPUT) + mae_output = os.path.join(tmp_dir, _MMSE.MAE_OUTPUT) + sdf_output = os.path.join(tmp_dir, _MMSE.SDF_OUTPUT) + + return mae_input, mae_output, sdf_output + + def _prepare_settings_file(self, tmp_dir: str) -> str: + path_settings_file = os.path.join(tmp_dir, _MMSE.COM_FILE_PATH) + + # join the input and output paths (at the beginning of the COM file) and the + # settings from either the default or the configuration together + complete_com = "\n".join( + [ + os.path.join(tmp_dir, _MMSE.MAE_INPUT), + os.path.join(tmp_dir, _MMSE.MAE_OUTPUT), + self.settings.arguments.parameters[_MMSE.COM_FILE], + ] + ) + with open(path_settings_file, "w") as f: + f.writelines(complete_com) + return path_settings_file + + def _prepare_run_files( + self, tmp_dir: str, enumeration: Enumeration + ) -> Tuple[str, str, str, str, str]: + # generate the file paths (NOT populated yet) + mae_input, mae_output, sdf_output = self._prepare_file_paths(tmp_dir) + + # write the input SDF file and translate it into Schrodingers native MAE format + sdf_input = self._prepare_temp_input(tmp_dir, enumeration.get_molecule()) + self._sdconvert_util.sdf2mae(sdf_input, mae_input) + + # write out the settings file + com_file = self._prepare_settings_file(tmp_dir) + + return sdf_input, mae_input, mae_output, sdf_output, com_file + + def _parse_macromodel_result( + self, sdf_output: str, enumeration: Enumeration + ) -> List[Conformer]: + charge = str( + get_charge_for_molecule(enumeration.get_molecule(), add_as_tag=False) + ) + mol_supplier = Chem.SDMolSupplier(sdf_output, removeHs=False) + conformers = [] + for mol_id, mol in enumerate(mol_supplier): + # note, that formal charge information would be kept if available before (i.e. it retains tags) + mol.SetProp(_CTE.FORMAL_CHARGE_TAG, charge) + conformers.append(Conformer(conformer=mol)) + return conformers + + def execute(self): + for compound in self.get_compounds(): + for enumeration in compound.get_enumerations(): + if not self._input_object_valid(enumeration): + continue + + # set up + tmp_dir = self._move_to_temp_dir() + + # get the paths to the MAE and SDF input and output files and the COM file (settings) + ( + sdf_input, + mae_input, + mae_output, + sdf_output, + com_file, + ) = self._prepare_run_files(tmp_dir=tmp_dir, enumeration=enumeration) + + # execute MacroModel, obtain the output SDF and switch back the working directory to what it was before + result = self._execute_macromodel(com_file=com_file) + self._sdconvert_util.mae2sdf(mae_file=mae_output, sdf_file=sdf_output) + self._restore_working_dir() + + # parse output + conformers = self._parse_macromodel_result(sdf_output, enumeration) + enumeration.clear_conformers() + enumeration.add_conformers(conformers=conformers, auto_update=True) + self._logger.log( + f"Executed MacroModel and obtained {len(conformers)} conformers for enumeration {enumeration.get_index_string()}.", + _LE.INFO, + ) + + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/schrodinger/prepwizard.py b/icolos/core/workflow_steps/schrodinger/prepwizard.py new file mode 100644 index 0000000..3db17fa --- /dev/null +++ b/icolos/core/workflow_steps/schrodinger/prepwizard.py @@ -0,0 +1,90 @@ +from icolos.utils.enums.step_enums import StepGromacsEnum, StepPrepwizEnum +from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum +from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase +from icolos.utils.execute_external.schrodinger import SchrodingerExecutor +from icolos.core.containers.generic import GenericData +from pydantic import BaseModel +from copy import deepcopy +import os + +_SEE = SchrodingerExecutablesEnum() +_SGE = StepGromacsEnum() +_SPE = StepPrepwizEnum() + + +class StepPrepwizard(StepSchrodingerBase, BaseModel): + """ + Interface to Schrodinger's PrepWizard program for protein prep + """ + + def __init__(self, **data): + super().__init__(**data) + self._initialize_backend(executor=SchrodingerExecutor) + self._check_backend_availability() + + def _parse_args(self): + parameters = deepcopy(self.settings.arguments.parameters) + arguments = [] + if len(self.settings.arguments.flags) > 0: + for flag in self.settings.arguments.flags: + arguments.append(str(flag)) + if parameters: + for key in parameters.keys(): + arguments.append(key) + if parameters[key] is not None and parameters[key] != "": + arguments.append(str(parameters[key])) + input_file = self.data.generic.get_file_names_by_extension("pdb")[0] + output_file = input_file # write to the same file name to keep things tidy + arguments.append(input_file) + arguments.append(output_file) + return arguments + + def _parse_output(self, tmp_dir: str): + output_pdb = os.path.join( + tmp_dir, self.data.generic.get_file_names_by_extension("pdb")[0] + ) + with open(output_pdb, "r") as f: + data = f.read() + self.data.generic.clear_file_dict() + output_file = GenericData(file_name=_SGE.COMPLEX_PDB, file_data=data) + self.data.generic.add_file(output_file) + + def _remove_ligand(self, tmp_dir): + remove_res = self.settings.additional[_SPE.REMOVE_RES] + pdb_file = self.data.generic.get_argument_by_extension("pdb") + cleaned_pdb_lines = [] + # handle ligand removal mode: strip ligands, leave cofactors + if remove_res != _SPE.LIGANDS and not isinstance(remove_res, list): + remove_res = list(remove_res) + + with open(os.path.join(tmp_dir, pdb_file), "r") as f: + if remove_res == _SPE.LIGANDS: + # automatically remove ligands, keep cofactors that are specified in the enum. + for line in f.readlines(): + if line is not None and ( + line.split()[0] == "ATOM" + or any(l in line for l in _SPE.COFACTOR_IDS) + ): + cleaned_pdb_lines.append(line) + else: + for line in f.readlines(): + if not any(l in line for l in remove_res): + cleaned_pdb_lines.append(line) + + with open(os.path.join(tmp_dir, pdb_file), "w") as f: + f.writelines(cleaned_pdb_lines) + + def execute(self): + tmp_dir = self._make_tmpdir() + args = self._parse_args() + self.data.generic.write_out_all_files(tmp_dir) + if ( + _SPE.REMOVE_RES in self.settings.additional.keys() + and self.settings.additional[_SPE.REMOVE_RES] is not None + ): + self._remove_ligand(tmp_dir) + self._backend_executor.execute( + command=_SEE.PREPWIZARD, arguments=args, check=True, location=tmp_dir + ) + + self._parse_output(tmp_dir) diff --git a/icolos/core/workflow_steps/schrodinger/prime.py b/icolos/core/workflow_steps/schrodinger/prime.py new file mode 100644 index 0000000..0081f8a --- /dev/null +++ b/icolos/core/workflow_steps/schrodinger/prime.py @@ -0,0 +1,239 @@ +import os + +from pydantic import BaseModel, PrivateAttr +from rdkit import Chem +from copy import deepcopy + +from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase +from icolos.utils.execute_external.prime import PrimeExecutor +from icolos.utils.execute_external.schrodinger import SchrodingerExecutor + +from icolos.core.containers.compound import Conformer + +from icolos.utils.enums.program_parameters import PrimeEnum, SchrodingerExecutablesEnum +from icolos.utils.enums.step_enums import StepPrimeEnum, StepGlideEnum +from icolos.core.workflow_steps.step import _LE +from icolos.core.step_utils.sdconvert_util import SDConvertUtil +from icolos.core.step_utils.structcat_util import StructcatUtil +from icolos.utils.general.files_paths import gen_tmp_file +from icolos.utils.general.parallelization import SubtaskContainer, Parallelizer +from tempfile import mkdtemp + +_SPE = StepPrimeEnum() +_PE = PrimeEnum() +_SEE = SchrodingerExecutablesEnum() +_SGE = StepGlideEnum() + + +class StepPrime(StepSchrodingerBase, BaseModel): + """ + Interface to Schrodinger's Prime mmgbsa implementation + """ + + _schrodinger_executor: SchrodingerExecutor = None + + class Config: + underscore_attrs_are_private = True + + _sdconvert_util = PrivateAttr() + _structcat_util = PrivateAttr() + + def __init__(self, **data): + super().__init__(**data) + + # initialize the executor and test availability + self._initialize_backend(executor=PrimeExecutor) + self._check_backend_availability() + self._schrodinger_executor = SchrodingerExecutor( + prefix_execution=self.execution.prefix_execution + ) + + # prepare sdconvert utility + self._sdconvert_util = SDConvertUtil( + prefix_execution=self.execution.prefix_execution, + binary_location=self.execution.binary_location, + ) + + # prepare structcat utility + self._structcat_util = StructcatUtil( + prefix_execution=self.execution.prefix_execution, + binary_location=self.execution.binary_location, + ) + + def _execute_prime(self): + # note, that as the output file name cannot be set (an "-out.maegz" will be attached), this does + # not need to be heeded here and is encoded in the fixed file name strings + + prime_parallelizer = Parallelizer(func=self._run_subjob) + n = 1 + + while self._subtask_container.done() is False: + + next_batch = self._get_sublists(get_first_n_lists=self._get_number_cores()) + + # generate lists for the next batch + tmp_dirs, complex_paths, output_sdf_paths = self._prepare_batch(next_batch) + + self._apply_token_guard() + + _ = [sub.increment_tries() for element in next_batch for sub in element] + _ = [sub.set_status_failed() for element in next_batch for sub in element] + + self._logger.log(f"Executing prime for batch {n}", _LE.DEBUG) + + prime_parallelizer.execute_parallel( + complex_path=complex_paths, + sdf_output=output_sdf_paths, + tmp_output_dir=tmp_dirs, + ) + + self._parse_prime_output( + complex_paths, tmp_dirs, output_sdf_paths, next_batch + ) + n += 1 + + def _parse_prime_output(self, complex_paths, tmp_dirs, output_sdf_paths, batch): + # go through the batch, get the info from the output file and + scores = [] + for i in range(len(output_sdf_paths)): + cur_sublist = batch[i] + sdf_path = output_sdf_paths[i] + curr_enum = None + curr_conformer = None + mol_supplier = Chem.SDMolSupplier(sdf_path, removeHs=False) + for mol in mol_supplier: + # check whether the name corresponds to an enum or conformer + identifier = str(mol.GetProp("_Name")) + is_enum = True if len(identifier.split(":")) == 2 else False + if ( + not is_enum + ): # if we are dealing with a conformer, drop the conformer index to get the enum id + enum_index = ":".join(list(mol.GetProp("_Name").split(":"))[:-1]) + else: + enum_index = identifier + # extract the enumeration object, regardless of whether we're dealing with a conformer or an enumeration + prime_score = mol.GetProp(_SPE.MMGBSA_SCORE) + for compound in self.get_compounds(): + for enumeration in compound.get_enumerations(): + if enumeration.get_index_string() == enum_index: + curr_enum = enumeration + # if we have a conformer, find the conformer with the right ID and append the score to the existing object + if not is_enum: + assert curr_enum is not None + for conformer in curr_enum.get_conformers(): + if conformer.get_index_string() == identifier: + curr_conformer = conformer + + else: + # if scoring an enumeration, create and attach a conformer out of it + curr_conformer = Conformer(conformer=mol) + + curr_enum.add_conformer(curr_conformer) + + assert curr_conformer is not None + # now we have the conformer from the originals, set the prime score + curr_conformer.get_molecule().SetProp(_SPE.MMGBSA_SCORE, prime_score) + self._logger.log( + f"Calculated dG Bind of {prime_score} for conformer {curr_conformer.get_index_string()}", + _LE.INFO, + ) + scores.append(prime_score) + + # after parsing, remove the directories + self._remove_temporary(tmp_dirs) + + # set success status + for sublist in batch: + for task in sublist: + task.set_status_success() + + def _prepare_batch(self, batch): + # generate input files for the batch and return tmpdirs + + tmp_dirs = [] + complex_paths = [] + output_sdf_paths = [] + for next_subtask_list in batch: + tmp_dir = mkdtemp() + _, tmp_input_sdf_file = gen_tmp_file(suffix=".sdf", dir=tmp_dir) + _, tmp_input_mae_file = gen_tmp_file(suffix=".maegz", dir=tmp_dir) + _, tmp_output_sdf_file = gen_tmp_file(suffix=".sdf", dir=tmp_dir) + writer = Chem.SDWriter(tmp_input_sdf_file) + for subtask in next_subtask_list: + mol = deepcopy(subtask.data.get_molecule()) + conf_id = subtask.data.get_index_string() + mol.SetProp("_Name", conf_id) + writer.write(mol) + writer.close() + + # now we have an sdf file with all the conformers from that batch. Attach the + structcat_args = [ + "-imae", + self.settings.additional[_SPE.RECEPTOR], + "-isd", + tmp_input_sdf_file, + "-omae", + tmp_input_mae_file, + ] + self._schrodinger_executor.execute( + command=_SEE.STRUCTCAT, + arguments=structcat_args, + location=tmp_dir, + check=True, + ) + + tmp_dirs.append(tmp_dir) + complex_paths.append(tmp_input_mae_file) + output_sdf_paths.append(tmp_output_sdf_file) + + return tmp_dirs, complex_paths, output_sdf_paths + + def _run_subjob(self, complex_path, sdf_output, tmp_output_dir): + + work_dir = os.getcwd() + os.chdir(tmp_output_dir) + + arguments = [complex_path, _PE.PRIME_OUTTYPE, _PE.PRIME_OUTTYPE_LIGAND] + for key in self.settings.arguments.parameters.keys(): + if key not in [_PE.PRIME_OUTTYPE, _PE.PRIME_NJOBS]: + arguments.append(key) + arguments.append(str(self.settings.arguments.parameters[key])) + for flag in self.settings.arguments.flags: + arguments.append(str(flag)) + if _PE.PRIME_WAIT not in arguments: + arguments.append(_PE.PRIME_WAIT) + + result = self._backend_executor.execute( + command=_PE.PRIME_MMGBSA, + arguments=arguments, + check=True, + location=tmp_output_dir, + ) + + output_file = complex_path.split(".")[0] + "-out.maegz" + assert os.path.isfile(output_file) + # Convert the mae ligand output back to sdf + self._sdconvert_util.mae2sdf(output_file, sdf_output) + os.chdir(work_dir) + return result + + def execute(self): + # need to unwrap the conformers to efficiently run in parallel, create lists of subtasks, each with their own files, tmpdirs, then execute them in parallel + all_conformers = [] + for compound in self.get_compounds(): + for enumeration in compound.get_enumerations(): + if enumeration.get_conformers(): + # default running mode is to score incoming conformers without changing their configurations + for conformer in enumeration.get_conformers(): + all_conformers.append(conformer) + else: + all_conformers.append(enumeration) + + self._subtask_container = SubtaskContainer( + max_tries=self.execution.failure_policy.n_tries + ) + self._subtask_container.load_data(all_conformers) + self._execute_prime() + self._logger.log( + f"Executed Prime for {len(all_conformers)} confomers", _LE.DEBUG + ) diff --git a/icolos/core/workflow_steps/step.py b/icolos/core/workflow_steps/step.py new file mode 100644 index 0000000..657258c --- /dev/null +++ b/icolos/core/workflow_steps/step.py @@ -0,0 +1,494 @@ +import time + +from icolos.core.containers.generic import GenericContainer, GenericData +import multiprocessing +import shutil +import tempfile +from typing import Callable, List, Dict, Tuple + +from pydantic import BaseModel, PrivateAttr +from rdkit import Chem +from copy import deepcopy +import os + + +from icolos.core.step_utils.input_preparator import ( + StepData, + InputPreparator, + StepInputParameters, +) +from icolos.loggers.steplogger import StepLogger +from icolos.loggers.blank_logger import BlankLogger +from icolos.utils.enums.step_enums import StepGromacsEnum +from icolos.core.containers.compound import Compound, Conformer +from icolos.core.step_utils.step_writeout import ( + StepWriteoutParameters, + WriteOutHandler, + _SBE, +) +from icolos.utils.enums.execution_enums import ExecutionResourceEnum +from icolos.utils.execute_external.execute import Executor +from icolos.utils.general.icolos_exceptions import StepFailed + +from icolos.utils.enums.compound_enums import CompoundTagsEnum +from icolos.utils.enums.logging_enums import LoggingConfigEnum +from icolos.utils.enums.write_out_enums import WriteOutEnum +from icolos.utils.general.files_paths import gen_tmp_file, any_in_file +from icolos.utils.general.parallelization import SubtaskContainer, Subtask +from tempfile import mkdtemp +from distutils.dir_util import copy_tree +from icolos.core.containers.compound import unroll_enumerations, unroll_conformers +from icolos.utils.general.progress_bar import get_progress_bar_string + +_LE = LoggingConfigEnum() +_WE = WriteOutEnum() +_CTE = CompoundTagsEnum() +_SGE = StepGromacsEnum() +_ERE = ExecutionResourceEnum + + +class StepFailurePolicyParameters(BaseModel): + n_tries: int = 1 + retry_wait_seconds: int = 10 + + +class StepExecutionResourceParameters(BaseModel): + partition: _ERE = _ERE.CORE + time: str = "12:00:00" + gres: str = None + mem: str = "64g" + cores: int = 8 + modules: List = [] + other_args: dict = {} + + +class StepExecutionParameters(BaseModel): + class StepExecutionParallelizationParameters(BaseModel): + cores: int = 1 + max_length_sublists: int = None + + prefix_execution: str = None + binary_location: str = None + pipe_input: str = None + parallelization: StepExecutionParallelizationParameters = ( + StepExecutionParallelizationParameters() + ) + failure_policy: StepFailurePolicyParameters = StepFailurePolicyParameters() + check_backend_availability: bool = False + job_control: StepExecutionResourceParameters = StepExecutionResourceParameters() + resource: _ERE = _ERE.LOCAL + + +class StepSettingsArgsParameters(BaseModel): + flags: List = [] + parameters: Dict = {} + + +class StepSettingsParameters(BaseModel): + arguments: StepSettingsArgsParameters = StepSettingsArgsParameters() + additional: Dict = {} + + +class StepBase(BaseModel): + step_id: str + work_dir: str = None + type: str = None + data: StepData = StepData() + input: StepInputParameters = StepInputParameters() + writeout: List[StepWriteoutParameters] = [] + execution: StepExecutionParameters = StepExecutionParameters() + settings: StepSettingsParameters = StepSettingsParameters() + + class Config: + underscore_attrs_are_private = True + + _logger = PrivateAttr() + _logger_blank = PrivateAttr() + _old_wdir = PrivateAttr() + _workflow_object = PrivateAttr() + _backend_executor: Executor = PrivateAttr() + _subtask_container: SubtaskContainer = PrivateAttr() + + def __init__(self, **data): + super().__init__(**data) + + self._logger_blank = BlankLogger() + self._old_wdir = os.getcwd() + self._workflow_object = None + self._backend_executor = None + + self._logger = StepLogger() + self._logger_blank = BlankLogger() + + # @staticmethod + def _make_tmpdir(self): + if self.work_dir is not None: + return self.work_dir + else: + self.work_dir = tempfile.mkdtemp() + return self.work_dir + + def _remove_temporary(self, paths): + if paths is not None: + if not isinstance(paths, list): + paths = [paths] + if ( + self.get_workflow_object() is None + or self.get_workflow_object().header.global_settings.remove_temporary_files + ): + for path in paths: + if os.path.isdir(path): + shutil.rmtree(path, ignore_errors=True) + elif os.path.isfile(path) and os.path.exists(path): + os.remove(path) + else: + self._logger.log( + f"Path {path} is neither a valid folder nor file path.", + _LE.WARNING, + ) + else: + self._logger.log( + f"Keeping {len(paths)} temporary file(s) / folder(s): {', '.join(paths)}", + _LE.DEBUG, + ) + + @staticmethod + def _move_to_temp_dir() -> str: + cur_tmp_dir = tempfile.mkdtemp() + os.chdir(cur_tmp_dir) + return cur_tmp_dir + + @staticmethod + def _move_to_dir(path: str): + os.chdir(path) + + def _restore_working_dir(self): + os.chdir(self._old_wdir) + + def execute(self): + raise NotImplementedError + + def get_compound_by_name(self, name: str) -> Compound: + for compound in self.data.compounds: + if compound.get_name() == name: + return compound + + def get_compounds(self) -> List[Compound]: + return self.data.compounds + + def get_generic(self) -> GenericContainer: + return self.data.generic + + def clone_compounds(self) -> List[Compound]: + return [deepcopy(comp) for comp in self.data.compounds] + + def process_write_out(self): + # TODO: process generic data write-out + for writeout in self.writeout: + writeout_handler = WriteOutHandler(config=writeout) + writeout_handler.set_data(self.data) + writeout_handler.write() + + def get_compound_stats(self) -> Tuple[int, int, int]: + n_comp = len(self.get_compounds()) + n_enum = len(unroll_enumerations(self.get_compounds())) + n_conf = len(unroll_conformers(self.get_compounds())) + return n_comp, n_enum, n_conf + + def generate_input(self): + preparator = InputPreparator( + workflow=self.get_workflow_object(), logger=self._logger + ) + self.data, self.work_dir = preparator.generate_input( + step_input=self.input, step_type=self.type + ) + + # check for a perturbation map for fep workflows + self._logger.log( + f"Loaded {len(self.data.compounds)} compounds and {len(self.data.generic.get_flattened_files())} generic data fields for step {self.get_step_id()}.", + _LE.DEBUG, + ) + + def set_workflow_object(self, workflow_object): + self._workflow_object = workflow_object + + def get_workflow_object(self): + return self._workflow_object + + def get_step_id(self) -> str: + return self.step_id + + def set_step_id(self, step_id: str): + self.step_id = step_id + + def _initialize_backend(self, executor: Callable): + if self.execution.resource == _ERE.SLURM: + self._backend_executor = executor( + prefix_execution=self.execution.prefix_execution, + binary_location=self.execution.binary_location, + cores=self.execution.job_control.cores, + partition=self.execution.job_control.partition, + time=self.execution.job_control.time, + mem=self.execution.job_control.mem, + modules=self.execution.job_control.modules, + other_args=self.execution.job_control.other_args, + gres=self.execution.job_control.gres, + ) + else: + + self._backend_executor = executor( + prefix_execution=self.execution.prefix_execution, + binary_location=self.execution.binary_location, + ) + + def _unroll_compounds( + self, + compounds: List[Compound], + level: str = _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS, + ) -> List[Conformer]: + # TODO: move this to step_base or merge with methods from compound itself + + all_conformers = [] + for comp in compounds: + for enum in comp.get_enumerations(): + if level == _SBE.WRITEOUT_COMP_CATEGORY_ENUMERATIONS: + all_conformers.append(enum) + elif level == _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS: + for conf in enum: + all_conformers.append(conf) + return all_conformers + + def write_conformers(self, path: str): + """Convenience function for frequent conformer coordinate write-out. Better to use the WriteOutHandler class.""" + compounds_copy = self.clone_compounds() + params = { + _SBE.WRITEOUT_CONFIG: { + _SBE.WRITEOUT_COMP: { + _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: path, + _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE, + _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_SDF, + }, + } + } + writeout_handler = WriteOutHandler(**params) + writeout_handler.set_data(StepData(compounds=compounds_copy)) + writeout_handler.write() + + def write_generic_by_extension(self, path: str, ext: str, join=True): + """writes all files of a specific file type to the specified directory, retaining original files names""" + for file in self.data.generic.get_files_by_extension(ext): + file.write(path, join=join) + + def write_generic_by_name(self, path, name: str): + file = self.data.generic.get_file_by_name(name) + file.write(path) + + def _check_backend_availability(self, strict=True): + if self._backend_executor is None: + raise Exception( + "Cannot check backend availability before initialization is complete." + ) + + if self.execution.check_backend_availability: + if not self._backend_executor.is_available(): + if strict: + raise StepFailed( + f"Cannot initialize backend for step {self.step_id} - abort." + ) + else: + self._logger.log( + f"Backend availability check failed, proceeding anyways.", + _LE.WARNING, + ) + else: + self._logger.log(f"Checked backend availability - valid.", _LE.DEBUG) + + def _input_object_valid(self, obj) -> bool: + if obj.get_molecule() is None or not isinstance(obj.get_molecule(), Chem.Mol): + self._logger.log( + f"Object {obj.get_index_string()} skipped - no valid molecule.", + _LE.WARNING, + ) + return False + return True + + def _input_object_empty(self, obj) -> bool: + if obj.empty(): + self._logger.log( + f"Object {obj.get_index_string()} is skipped (empty).", _LE.WARNING + ) + return True + return False + + # TODO: REMOVE THIS FUNCTION (see: write_molecule_to_sdf()) + def _prepare_temp_input(self, tmp_dir: str, molecule: Chem.Mol) -> str: + _, tmp_sdf_path = gen_tmp_file(suffix=".sdf", dir=tmp_dir) + if molecule is None or not isinstance(molecule, Chem.Mol): + raise ValueError( + "Function requires input attribute to be an RDkit molecule." + ) + writer = Chem.SDWriter(tmp_sdf_path) + writer.write(molecule) + writer.close() + self._logger.log(f"Wrote input molecule to file {tmp_sdf_path}.", _LE.DEBUG) + return tmp_sdf_path + + def _get_sublists(self, get_first_n_lists: int = None) -> List[List[Subtask]]: + number_cores = self._get_number_cores() + + # decide how to slice the ligand list depending on whether a maximum length is defined or not + if self.execution.parallelization.max_length_sublists is not None: + slice_size = min( + max(self.execution.parallelization.max_length_sublists, 1), + len(self._subtask_container), + ) + return self._subtask_container.get_sublists( + partitions=None, + slice_size=slice_size, + get_first_n_lists=get_first_n_lists, + ) + else: + # split the ligands into as many cores as available + partitions = min(number_cores, len(self._subtask_container)) + return self._subtask_container.get_sublists( + partitions=partitions, + slice_size=None, + get_first_n_lists=get_first_n_lists, + ) + + def _get_number_cores(self): + # prepare the parallelization and set the number of cores to be used + cores = self.execution.parallelization.cores + if cores == 0: + cores = 1 + elif cores < 0: + # subtract the number of cores (neg. value, thus add up) from total number of cores, e.g. -1 will + # use all available cores minus 1 + cores = multiprocessing.cpu_count() + cores + return cores + + def _print_log_file(self, path: str): + if os.path.isfile(path): + with open(path, "r") as log_file: + log_file_raw = log_file.readlines() + self._logger.log(f"Printing log file {path}:\n", _LE.DEBUG) + for line in log_file_raw: + self._logger_blank.log(line.rstrip("\n"), _LE.DEBUG) + self._logger_blank.log("", _LE.DEBUG) + self._logger.log("--- End file", _LE.DEBUG) + + def _add_data_to_generic(self, file, data, extension=None): + """Write data from arbitrary file to generic container class""" + file_name = file.split("/")[-1] + # file types where they can be passed as arguments in a subsequent step + # TODO: this is not maintainable! + file_tag = ( + True + if file.endswith((".gro", "topol.top", "tpr", "fmp", "edge")) + else False + ) + file = GenericData( + file_name=file_name, file_data=data, argument=file_tag, extension=extension + ) + self.data.generic.add_file(file) + + def _parse_output( + self, + tmp_dir, + exclusion_list=( + "#", + "AC", + "AC0", + "INF", + "hashed", + "metadata", + "timekeys", + "000000", + ), + ): + """Generic method for parsing generic writeout, can be overwritten in child classes""" + self.data.generic.clear_file_dict() + file_list = [os.path.join(tmp_dir, f) for f in os.listdir(tmp_dir)] + for file in file_list: + if os.path.isfile(file) and not file.endswith(exclusion_list): + try: + with open(file, "r") as f: + data = f.read() + binary = False + except UnicodeDecodeError: + with open(file, "rb") as f: + data = f.read() + binary = True + # work out if we handle the data or just the path to it on disk + file_size = os.stat(file).st_size + if file_size > float(_SBE.FILE_SIZE_THRESHOLD.value): + # do not write to the dict - file is too large to store in memory + _, tmp_path = gen_tmp_file(suffix="." + str(file).split(".")[-1]) + self._logger.log( + f"Large file detected, storing at {tmp_path}", _LE.INFO + ) + if binary: + with open(tmp_path, "wb") as f: + f.write(data) + else: + with open(tmp_path, "w") as f: + f.write(data) + data = tmp_path + + self._add_data_to_generic(file, data) + self._logger.log(f"Stored data for file {file}", _LE.DEBUG) + elif os.path.isdir(file): + tmp_dir = mkdtemp() + copy_tree(file, tmp_dir) + self._add_data_to_generic(file=file, data=tmp_dir, extension="dir") + + # we have picked up a directory, we want the entire contents copied somewhere + + def _wait_until_file_generation( + self, + path, + path_log=None, + interval_sec=1, + maximum_sec=None, + success_strings: set = set(), + fail_strings: set = set(), + ) -> bool: + # TODO: Refactor that without breaking the Glide dependency. + counter = 0 + while not os.path.exists(path): + # wait for an interval + time.sleep(interval_sec) + counter = counter + 1 + + # if a Glide logfile path has been specified, check, whether critical messages indicating an abort are there + # note, that we return "True" to indicate that the "file generation" has nevertheless been completed + if path_log is not None: + if any_in_file(path_log, fail_strings): + self._logger.log( + f"A critical error occurred in sublist execution.", _LE.WARNING + ) + self._print_log_file(path_log) + return True + if any_in_file(path_log, success_strings): + # log file indicates job is done; give a bit of leeway to ensure the writing is done + time.sleep(3) + break + + # if there's time left, proceed + if maximum_sec is not None and counter * interval_sec >= maximum_sec: + break + if os.path.exists(path): + return True + else: + return False + + def _log_execution_progress(self): + number_tasks_done = len(self._subtask_container.get_done_tasks()) + number_tasks_total = len(self._subtask_container.subtasks) + self._logger.log( + get_progress_bar_string(number_tasks_done, number_tasks_total, length=65), + _LE.INFO, + ) diff --git a/icolos/core/workflow_steps/structure_prediction/__init__.py b/icolos/core/workflow_steps/structure_prediction/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/core/workflow_steps/structure_prediction/disicl.py b/icolos/core/workflow_steps/structure_prediction/disicl.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/core/workflow_steps/structure_prediction/dssp.py b/icolos/core/workflow_steps/structure_prediction/dssp.py new file mode 100644 index 0000000..9a5151e --- /dev/null +++ b/icolos/core/workflow_steps/structure_prediction/dssp.py @@ -0,0 +1,56 @@ +from typing import List +from icolos.core.workflow_steps.step import StepBase +from icolos.utils.execute_external.execute import Executor +from pydantic import BaseModel +from icolos.utils.enums.step_enums import StepDSSPEnum +from icolos.utils.enums.program_parameters import DSSPEnum +import os + + +_SDE = StepDSSPEnum() +_DE = DSSPEnum() + + +class StepDSSP(StepBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + self._initialize_backend(executor=Executor) + + def _construct_arguments(self, tmp_dir: str, file: str) -> List: + args = [] + for flag in self.settings.arguments.flags: + args.append(flag) + for key, value in self.settings.arguments.parameters.items(): + args.append(key) + args.append(value) + + # set the input and output files + args.append(file) + output = f"dssp_output_{file.split('.')[0]}.txt" + args.append(output) + return args + + def _parse_output(self, tmp_dir: str) -> None: + for file in [f for f in os.listdir(tmp_dir) if f.endswith("txt")]: + with open(os.path.join(tmp_dir, file), "r") as f: + self._add_data_to_generic(file, f.read()) + + def execute(self): + """ + Executes dssp on a set of input structures + """ + + tmp_dir = self._make_tmpdir() + print(tmp_dir) + self.data.generic.write_out_all_files(tmp_dir) + + file_list = self.data.generic.get_file_names_by_extension(ext="pdb") + + for file in file_list: + arguments = self._construct_arguments(tmp_dir, file) + self._backend_executor.execute( + command=_DE.MKDSSP, arguments=arguments, check=True, location=tmp_dir + ) + + self._parse_output(tmp_dir) + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/structure_prediction/pdb_fixer.py b/icolos/core/workflow_steps/structure_prediction/pdb_fixer.py new file mode 100644 index 0000000..9d46ed0 --- /dev/null +++ b/icolos/core/workflow_steps/structure_prediction/pdb_fixer.py @@ -0,0 +1,66 @@ +# implement pdbfixer as FOSS alternative to proteinprep +from icolos.utils.enums.step_enums import StepPdbFixerEnum +from icolos.core.workflow_steps.step import StepBase +from icolos.utils.enums.program_parameters import PdbFixerEnum +from icolos.utils.execute_external.execute import Executor +from pydantic import BaseModel +from pdbfixer.pdbfixer import PDBFixer +import os + + +_SFE = StepPdbFixerEnum() +_FE = PdbFixerEnum() + + +class StepPdbFixer(StepBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + self._initialize_backend(executor=Executor) + + def _parse_arguments(self): + default_flags = [ + "--replace-nonstandard", + "--add-residues", + ] + default_params = { + "--ph": "7.0", + "--add-atoms": "all", + "--keep-heterogens": "all", + } + arguments = [] + for arg in self.settings.arguments.flags: + arguments.append(arg) + for key, value in self.settings.arguments.parameters.items(): + formatted_arg = f"{key}={value}" + arguments.append(formatted_arg) + for key in default_flags: + if key not in self.settings.arguments.flags: + arguments.append(key) + for key, value in default_params.items(): + if key not in self.settings.arguments.parameters.keys(): + formatted_arg = f"{key}={value}" + arguments.append(formatted_arg) + return arguments + + def execute(self): + + tmp_dir = self._make_tmpdir() + + self.data.generic.write_out_all_files(tmp_dir) + pdb_files = self.data.generic.get_file_names_by_extension("pdb") + + arguments = self._parse_arguments() + + for file in pdb_files: + path = os.path.join(tmp_dir, file) + arguments.extend(["--output", path]) + arguments = [path] + arguments + + self._backend_executor.execute( + command=_FE.FIXER, arguments=arguments, location=tmp_dir, check=True + ) + + # + self._parse_output(tmp_dir) + + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/structure_prediction/peptide_embedder.py b/icolos/core/workflow_steps/structure_prediction/peptide_embedder.py new file mode 100644 index 0000000..f0b6a53 --- /dev/null +++ b/icolos/core/workflow_steps/structure_prediction/peptide_embedder.py @@ -0,0 +1,36 @@ +from pydantic import BaseModel +from icolos.core.workflow_steps.step import StepBase +from Bio import SeqIO +from Bio.PDB import PDBIO +import os +import PeptideBuilder +from PeptideBuilder import Geometry + + +class StepPeptideEmbedder(StepBase, BaseModel): + def __init__(self, **data): + super().__init__(**data) + + def execute(self): + # use the PeptideBuilder python library to build a rough peptide structure using + # sensible psi, phi angles etc, for subsequent simulation + + tmp_dir = self._make_tmpdir() + self.data.generic.write_out_all_files(tmp_dir) + # Extract the peptide sequence from the provided fasta file + fasta_file = self.data.generic.get_argument_by_extension("fasta") + + sequences = list(SeqIO.parse(os.path.join(tmp_dir, fasta_file), format="fasta")) + + for idx, seq in enumerate(sequences): + geom = [Geometry.geometry(aa) for aa in seq.seq] + structure = PeptideBuilder.make_structure_from_geos(geom) + + out = PDBIO() + out.set_structure(structure) + # TODO: find a better naming strategy than this + out.save(os.path.join(tmp_dir, f"sequence_{idx}.pdb")) + + self._parse_output(tmp_dir) + + self._remove_temporary(tmp_dir) diff --git a/icolos/core/workflow_steps/structure_prediction/rosetta_abinitio.py b/icolos/core/workflow_steps/structure_prediction/rosetta_abinitio.py new file mode 100644 index 0000000..385fdbb --- /dev/null +++ b/icolos/core/workflow_steps/structure_prediction/rosetta_abinitio.py @@ -0,0 +1,15 @@ +from icolos.core.workflow_steps.step import StepBase +from pydantic import BaseModel +from icolos.utils.execute_external.rosetta import RosettaExecutor + +# Wrapping for Rosetta ab initio structure prediction +# Note the execution is finicky, and requires some proper set up to get this to work +# we run everything locally, since public web servers are not an option for us. +# some config is required to get make_fragments.pl to run with all its dependencies in place + + +class StepRosettaAbinitio(StepBase, BaseModel): + def __init__(self, **data): + super().__init__(data) + + self._inititalize_backend(executor=RosettaExecutor) diff --git a/icolos/loggers/__init__.py b/icolos/loggers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/loggers/agentlogger.py b/icolos/loggers/agentlogger.py new file mode 100644 index 0000000..a77754f --- /dev/null +++ b/icolos/loggers/agentlogger.py @@ -0,0 +1,12 @@ +import logging + +from icolos.loggers.base_logger import BaseLogger + + +class AgentLogger(BaseLogger): + def __init__(self): + super().__init__() + + def _initialize_logger(self): + logger = logging.getLogger(self._LE.LOGGER_AGENT) + return logger diff --git a/icolos/loggers/base_logger.py b/icolos/loggers/base_logger.py new file mode 100644 index 0000000..a3e0254 --- /dev/null +++ b/icolos/loggers/base_logger.py @@ -0,0 +1,27 @@ +from abc import ABC, abstractmethod + +from icolos.utils.enums.logging_enums import LoggingConfigEnum + + +class BaseLogger(ABC): + def __init__(self): + self._LE = LoggingConfigEnum() + self._logger = self._initialize_logger() + + def log(self, message: str, level: str): + if level == self._LE.DEBUG: + self._logger.debug(message) + elif level == self._LE.INFO: + self._logger.info(message) + elif level == self._LE.WARNING: + self._logger.warning(message) + elif level == self._LE.ERROR: + self._logger.error(message) + elif level == self._LE.EXCEPTION: + self._logger.exception(message) + else: + raise ValueError("Logger level not supported.") + + @abstractmethod + def _initialize_logger(self): + raise NotImplementedError("Overwrite this method in child classes.") diff --git a/icolos/loggers/blank_logger.py b/icolos/loggers/blank_logger.py new file mode 100644 index 0000000..71d3708 --- /dev/null +++ b/icolos/loggers/blank_logger.py @@ -0,0 +1,14 @@ +import logging + +from icolos.loggers.base_logger import BaseLogger + + +class BlankLogger(BaseLogger): + """This logger serves as a "verbatim" interface.""" + + def __init__(self): + super().__init__() + + def _initialize_logger(self): + logger = logging.getLogger(self._LE.LOGGER_BLANK) + return logger diff --git a/icolos/loggers/entrypoint_logger.py b/icolos/loggers/entrypoint_logger.py new file mode 100644 index 0000000..8406660 --- /dev/null +++ b/icolos/loggers/entrypoint_logger.py @@ -0,0 +1,12 @@ +import logging + +from icolos.loggers.base_logger import BaseLogger + + +class EntryPointLogger(BaseLogger): + def __init__(self): + super().__init__() + + def _initialize_logger(self): + logger = logging.getLogger(self._LE.LOGGER_ENTRYPOINT) + return logger diff --git a/icolos/loggers/iologger.py b/icolos/loggers/iologger.py new file mode 100644 index 0000000..86cec06 --- /dev/null +++ b/icolos/loggers/iologger.py @@ -0,0 +1,12 @@ +import logging + +from icolos.loggers.base_logger import BaseLogger + + +class IOLogger(BaseLogger): + def __init__(self): + super().__init__() + + def _initialize_logger(self): + logger = logging.getLogger(self._LE.LOGGER_IO) + return logger diff --git a/icolos/loggers/logger_utils.py b/icolos/loggers/logger_utils.py new file mode 100644 index 0000000..6133f15 --- /dev/null +++ b/icolos/loggers/logger_utils.py @@ -0,0 +1,4 @@ +def log_multiline_string(logger, level: str, multi_line_string: str): + splitted = multi_line_string.split("\n") + for line in splitted: + logger.log(line, level) diff --git a/icolos/loggers/steplogger.py b/icolos/loggers/steplogger.py new file mode 100644 index 0000000..a83d64e --- /dev/null +++ b/icolos/loggers/steplogger.py @@ -0,0 +1,12 @@ +import logging + +from icolos.loggers.base_logger import BaseLogger + + +class StepLogger(BaseLogger): + def __init__(self): + super().__init__() + + def _initialize_logger(self): + logger = logging.getLogger(self._LE.LOGGER_STEP) + return logger diff --git a/icolos/scripts/__init__.py b/icolos/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/scripts/cli.py b/icolos/scripts/cli.py new file mode 100644 index 0000000..8331502 --- /dev/null +++ b/icolos/scripts/cli.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python + +import os +import sys +import json +import argparse + +from icolos.core.composite_agents.workflow import WorkFlow + +from icolos.loggers.entrypoint_logger import EntryPointLogger + +from icolos.utils.enums.composite_agents_enums import WorkflowEnum +from icolos.utils.enums.logging_enums import LoggingConfigEnum +from icolos.utils.enums.entry_points import ExecutorEnum + +from icolos.utils.entry_point_functions.logging_helper_functions import ( + initialize_logging, +) +from icolos.utils.entry_point_functions.parsing_functions import parse_header +from icolos.utils.general.files_paths import attach_root_path + + +class IcolosCLI: + def __init__(self) -> None: + # enums + _LE = LoggingConfigEnum() + _EE = ExecutorEnum() + _WE = WorkflowEnum() + + # initialize logger + logger = EntryPointLogger() + + # get the input parameters and parse them + parser = argparse.ArgumentParser( + description='Implements entry point for the "Icolos" workflow class.' + ) + parser.add_argument( + "-conf", + type=str, + default=None, + help="A path to an workflow's configuration file (JSON dictionary) that is to be executed.", + ) + parser.add_argument( + "-debug", + action="store_true", + help='Set this flag to activate the inbuilt debug logging mode (this will overwrite parameter "-log_conf", if set).', + ) + parser.add_argument( + "--global_variables", + nargs="+", + default=None, + type=str, + help='List of strings, setting global variables with key and value, e.g. "root:/path/to/root".', + ) + parser.add_argument( + "--global_settings", + nargs="+", + default=None, + type=str, + help='List of strings, setting global settings with key and value, e.g. "remove_temporary:False".', + ) + args, args_unk = parser.parse_known_args() + + if args.conf is None or not os.path.isfile(args.conf): + raise Exception( + 'Parameter "-conf" must be a relative or absolute path to a configuration (JSON) file.' + ) + + # load configuration + with open(args.conf) as file: + conf = file.read().replace("\r", "").replace("\n", "") + conf = json.loads(conf) + + # set the logging configuration according to parameters + log_conf = attach_root_path(_LE.PATH_CONFIG_DEFAULT) + if args.debug: + log_conf = attach_root_path(_LE.PATH_CONFIG_DEBUG) + logger = initialize_logging(log_conf_path=log_conf, workflow_conf=conf) + + # update global variables and settings + conf = parse_header( + conf=conf, + args=args, + entry_point_path=os.path.realpath(__file__), + logger=logger, + ) + + # generate workflow object + workflow = WorkFlow(**conf[_WE.WORKFLOW]) + workflow.initialize() + + # execute the whole workflow + workflow.execute() + + sys.exit(0) + + +def entry_point(): + IcolosCLI() + + +if __name__ == "__main__": + entry_point() diff --git a/icolos/utils/__init__.py b/icolos/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/utils/constants.py b/icolos/utils/constants.py new file mode 100644 index 0000000..77dffc8 --- /dev/null +++ b/icolos/utils/constants.py @@ -0,0 +1,2 @@ +CONSTANT_T = 298 # temperature in Kelvin +CONSTANT_KB = 0.00198720425864 # Boltzmann constant in kcal / mol / Kelvin diff --git a/icolos/utils/entry_point_functions/__init__.py b/icolos/utils/entry_point_functions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/utils/entry_point_functions/logging_helper_functions.py b/icolos/utils/entry_point_functions/logging_helper_functions.py new file mode 100644 index 0000000..46dce4d --- /dev/null +++ b/icolos/utils/entry_point_functions/logging_helper_functions.py @@ -0,0 +1,29 @@ +import json +import logging.config as logging_config +from icolos.loggers.entrypoint_logger import EntryPointLogger +from icolos.utils.enums.composite_agents_enums import WorkflowEnum +from icolos.utils.enums.logging_enums import LoggingConfigEnum + +from icolos.utils.general.convenience_functions import * + +_WE = WorkflowEnum() +_LE = LoggingConfigEnum() + + +def initialize_logging(log_conf_path: str, workflow_conf: dict) -> EntryPointLogger: + with open(log_conf_path, "r") as f: + log_conf_dict = json.load(f) + header = nested_get(workflow_conf, [_WE.WORKFLOW, _WE.HEADER], default={}) + if in_keys(header, [_WE.LOGGING, _WE.LOGGING_LOGFILE]): + try: + log_conf_dict["handlers"]["file_handler"]["filename"] = nested_get( + header, [_WE.LOGGING, _WE.LOGGING_LOGFILE], None + ) + log_conf_dict["handlers"]["file_handler_blank"]["filename"] = nested_get( + header, [_WE.LOGGING, _WE.LOGGING_LOGFILE], None + ) + except KeyError: + pass + logging_config.dictConfig(log_conf_dict) + logger = EntryPointLogger() + return logger diff --git a/icolos/utils/entry_point_functions/parsing_functions.py b/icolos/utils/entry_point_functions/parsing_functions.py new file mode 100644 index 0000000..adcccd4 --- /dev/null +++ b/icolos/utils/entry_point_functions/parsing_functions.py @@ -0,0 +1,75 @@ +import os + +from icolos.loggers.base_logger import BaseLogger + +from icolos.utils.enums.composite_agents_enums import WorkflowEnum +from icolos.utils.enums.entry_points import ExecutorEnum +from icolos.utils.enums.logging_enums import LoggingConfigEnum + +_WE = WorkflowEnum() +_LE = LoggingConfigEnum() +_EE = ExecutorEnum() + + +def parse_global(g_input, logger: BaseLogger) -> dict: + if g_input is not None: + if not isinstance(g_input, list): + g_input = [g_input] + g_vars = {} + for new_var in g_input: + parts = new_var.split(":") + if len(parts) != 2: + logger.log( + f"Ignoring global input {new_var} set by command-line, as they must have one key and one value, separated by ':'.", + _LE.WARNING, + ) + continue + g_vars[parts[0]] = parts[1] + logger.log( + f'Parsed global input "{parts[0]}" (value: "{parts[1]}").', _LE.DEBUG + ) + return g_vars + else: + return {} + + +def add_global(configuration: dict, g_vars: dict, field: str) -> dict: + """This function adds (and overwrites) values for global settings and variables. Parameter "field" selects, + which key is to be used in the header region.""" + header = configuration[_WE.WORKFLOW][_WE.HEADER] + if field not in header.keys(): + header[field] = {} + for key, value in g_vars.items(): + header[field][key] = value + return configuration + + +def get_runtime_global_variables(args_conf: str, entry_point_path: str) -> dict: + return { # current workdir + _EE.RUNTIME_GLOBAL_VARIABLE_WORKDIR: os.getcwd(), + # directory where the entry point lies + _EE.RUNTIME_GLOBAL_VARIABLE_ENTRYPOINTDIR: os.path.dirname(entry_point_path), + # directory where the JSON lies + _EE.RUNTIME_GLOBAL_VARIABLE_CONFIGDIR: os.path.dirname( + os.path.abspath(args_conf) + ), + } + + +def parse_header(conf: dict, args, entry_point_path: str, logger: BaseLogger) -> dict: + # parse global variables from command-line + global_vars_CLI = parse_global(g_input=args.global_variables, logger=logger) + conf = add_global(conf, global_vars_CLI, _WE.GLOBAL_VARIABLES) + + # add run-specified global variables (the current directory, the JSONs directory, ...) + conf = add_global( + conf, + get_runtime_global_variables(args.conf, entry_point_path), + _WE.GLOBAL_VARIABLES, + ) + + # update global settings; if they are not supported, pydantic will complain later on + # TODO: at the moment the implementation ignores stuff that is not understood (e.g. when a typo occurs); this should fail + global_settings_CLI = parse_global(g_input=args.global_settings, logger=logger) + conf = add_global(conf, global_settings_CLI, _WE.GLOBAL_SETTINGS) + return conf diff --git a/icolos/utils/enums/__init__.py b/icolos/utils/enums/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/utils/enums/composite_agents_enums.py b/icolos/utils/enums/composite_agents_enums.py new file mode 100644 index 0000000..7f56949 --- /dev/null +++ b/icolos/utils/enums/composite_agents_enums.py @@ -0,0 +1,59 @@ +class BaseAgentEnum: + + HEADER = "header" + STEPS = "steps" + + # header + # --------- + ID = "id" + DESCRIPTION = "description" + GLOBAL_VARIABLES = "global_variables" + GLOBAL_SETTINGS = "global_settings" + LOGGING = "logging" + LOGGING_LOGFILE = "logfile" + + # exporting environment variables + ENVIRONMENT = "environment" + ENVIRONMENT_EXPORT = "export" + ENVIRONMENT_EXPORT_KEY = "key" + ENVIRONMENT_EXPORT_VALUE = "value" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class WorkflowEnum(BaseAgentEnum): + + WORKFLOW = "workflow" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class SchedulerEnum(BaseAgentEnum): + + SCHEDULER = "scheduler" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") diff --git a/icolos/utils/enums/compound_enums.py b/icolos/utils/enums/compound_enums.py new file mode 100644 index 0000000..46025c5 --- /dev/null +++ b/icolos/utils/enums/compound_enums.py @@ -0,0 +1,56 @@ +class CompoundTagsEnum: + + CONFORMER_ENERGY_TAG = "conformer_energy" + FORMAL_CHARGE_TAG = "formal_charge" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class CompoundContainerEnum: + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class EnumerationContainerEnum: + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class ConformerContainerEnum: + + EXTRA_DATA_COSMOFILE = "cosmo_file" + EXTRA_DATA_COORDFILE = "coord_file" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") diff --git a/icolos/utils/enums/entry_points.py b/icolos/utils/enums/entry_points.py new file mode 100644 index 0000000..d912d52 --- /dev/null +++ b/icolos/utils/enums/entry_points.py @@ -0,0 +1,15 @@ +class ExecutorEnum: + + RUNTIME_GLOBAL_VARIABLE_WORKDIR = "work_dir" + RUNTIME_GLOBAL_VARIABLE_ENTRYPOINTDIR = "entrypoint_dir" + RUNTIME_GLOBAL_VARIABLE_CONFIGDIR = "config_dir" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") diff --git a/icolos/utils/enums/execution_enums.py b/icolos/utils/enums/execution_enums.py new file mode 100644 index 0000000..9a51932 --- /dev/null +++ b/icolos/utils/enums/execution_enums.py @@ -0,0 +1,13 @@ +from enum import Enum + + +class ExecutionResourceEnum(str, Enum): + LOCAL = "local" + SLURM = "slurm" + PARTITION = "partition" + TIME = "time" + GRES = "gres" + MEM = "mem" + CORES = "cores" + CORE = "core" + GPU = "gpu" diff --git a/icolos/utils/enums/flow_control_enums.py b/icolos/utils/enums/flow_control_enums.py new file mode 100644 index 0000000..50b7093 --- /dev/null +++ b/icolos/utils/enums/flow_control_enums.py @@ -0,0 +1,15 @@ +from icolos.core.workflow_steps.prediction.active_learning import StepActiveLearning +from icolos.utils.enums.step_enums import StepBaseEnum +from icolos.core.flow_control.iterator import StepIterator + +_SBE = StepBaseEnum + + +class FlowControlInitializationEnum: + # These steps are responsible for initializing other steps as part of their execution + # Keep these separate to the main pool of steps to avoid circular imports + + FLOW_CONTROL_INIT_DICT = { + _SBE.STEP_ITERATOR: StepIterator, + _SBE.STEP_ACTIVE_LEARNING: StepActiveLearning, + } diff --git a/icolos/utils/enums/general_utils_enums.py b/icolos/utils/enums/general_utils_enums.py new file mode 100644 index 0000000..1244ee4 --- /dev/null +++ b/icolos/utils/enums/general_utils_enums.py @@ -0,0 +1,15 @@ +class CheckFileGenerationEnum: + + GENERATED_SUCCESS = "generated_success" + GENERATED_EMPTY = "generated_empty" + NOT_GENERATED = "not_generated" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") diff --git a/icolos/utils/enums/input_enums.py b/icolos/utils/enums/input_enums.py new file mode 100644 index 0000000..425f9b8 --- /dev/null +++ b/icolos/utils/enums/input_enums.py @@ -0,0 +1,19 @@ +class InputEnum: + + SOURCE_FIELD_COMPOUNDS = "compounds" + TARGET_FIELD_COMPOUNDS = "compounds" + TARGET_FIELD_CONFORMERS = "conformers" + + # REINVENT-compatible JSON input + JSON_NAMES = "names" + JSON_SMILES = "smiles" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") diff --git a/icolos/utils/enums/logging_enums.py b/icolos/utils/enums/logging_enums.py new file mode 100644 index 0000000..fc26240 --- /dev/null +++ b/icolos/utils/enums/logging_enums.py @@ -0,0 +1,31 @@ +class LoggingConfigEnum: + + # set levels (for now, they match to the "logging" default ones) + DEBUG = "debug" + INFO = "info" + WARNING = "warning" + ERROR = "error" + EXCEPTION = "exception" + + # paths to the configuration JSONs that are shipped with Icolos + PATH_CONFIG_DEFAULT = "icolos/config/logging/default.json" + PATH_CONFIG_VERBOSE = "icolos/config/logging/verbose.json" + PATH_CONFIG_DEBUG = "icolos/config/logging/debug.json" + PATH_CONFIG_TUTORIAL = "icolos/config/logging/tutorial.json" + + # high-level loggers defined in the configurations + LOGGER_IO = "io" + LOGGER_STEP = "step" + LOGGER_AGENT = "agent" + LOGGER_ENTRYPOINT = "entrypoint" + LOGGER_BLANK = "blank" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") diff --git a/icolos/utils/enums/parallelization.py b/icolos/utils/enums/parallelization.py new file mode 100644 index 0000000..56022f1 --- /dev/null +++ b/icolos/utils/enums/parallelization.py @@ -0,0 +1,18 @@ +from enum import Enum + + +class ParallelizationEnum(str, Enum): + + STATUS_READY = "ready" + STATUS_SUCCESS = "success" + STATUS_FAILED = "failed" + + # try to find the internal value and return + # def __getattr__(self, name): + # if name in self: + # return name + # raise AttributeError + + # # prohibit any attempt to set any values + # def __setattr__(self, key, value): + # raise ValueError("No changes allowed.") diff --git a/icolos/utils/enums/program_parameters.py b/icolos/utils/enums/program_parameters.py new file mode 100644 index 0000000..b3d6675 --- /dev/null +++ b/icolos/utils/enums/program_parameters.py @@ -0,0 +1,1428 @@ +class TurbomoleEnum: + + # general + # --------- + COORD = "coord" # hard-coded file name of input coordinates + CONTROL = "control" # hard-coded file name for control script + TM_CONFIG_DIR = "tm_config_dir" # directory path where the *.tm configurations lie + # the basename of the parameter set chosen, e.g. "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge" + TM_CONFIG_BASENAME = "tm_config_basename" + # this does not contain the charge or the ending + # full name e.g. "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge-1.tm" + TM_CONFIG_ENDING = ".tm" # ending of turbomole configuration files + TM_CONFIG_COSMO = "tm_config_cosmo" # path to the COSMO configuration file + # this needs to be set for each turbomole calculation (current folder) + TM_TURBOTMPDIR = "TURBOTMPDIR" + TM_OUTPUT_COSMOFILE = "mol.cosmo" # hard-coded file name of the turbomole output + TM_OUTPUT_COORDFILE = "coord" + TM_OUTPUT_FINAL_XYZ = "final.xyz" # hard-coded file name for turbomole output + TM_OUTPUT_FINAL_SDF = "final.sdf" + + # the "ridft" binary (+ configuration elements) + # --------- + TM_RIDFT = "ridft" # do DFT calculation with RI-J approximation for + # inter-electronic Coulomb term + TM_RIDFT_FAIL_IDENTIFICATION_STRING = "ridft ended abnormally" + TM_RIDFT_SUCCESS_STRING = ( + "ridft ended normally" # if this string is in stderr, execution was successful + ) + + # the "jobex" binary (+ configuration elements) + # --------- + TM_JOBEX = "jobex" # used for DFT optimization + TM_JOBEX_C = "-c" + TM_JOBEX_GCART = "-gcart" + TM_JOBEX_FAIL_IDENTIFICATION_STRING = "jobex ended abnormally" + TM_JOBEX_SUCCESS_STRING = "jobex ended normally" + + # the "cosmoprep" binary (+ configuration elements) + # --------- + TM_COSMOPREP = "cosmoprep" + TM_COSMOPREP_SUCCESS_STRING = "cosmoprep ended normally" + + # the "define" binary (+ configuration elements) + # --------- + TM_DEFINE = "define" + TM_DEFINE_SUCCESS_STRING = ( + "define ended normally" # if this string is in stderr, execution was successful + ) + + # the "x2t" binary (+ configuration elements) + # --------- + TM_X2T = ( + "x2t" # program to translate an XYZ file to TM input: x2t input.xyz > coord + ) + TM_X2T_SUCCESS_STRING = ( + "$coord" # if this string is in stdout, execution was successful + ) + + # the "t2x" binary (+ configuration elements) + # --------- + # program to translate an TM input to an XYZ file (last snapshot): t2x -c > final.xyz + TM_T2X = "t2x" + TM_T2X_C = "-c" + + # the "cosmotherm" binary (+ configuration elements) + # --------- + CT_COSMOTHERM = "cosmotherm" # the cosmotherm binary + CT_COSMOTHERM_FAIL_STRING = ( + "COSMOtherm ERROR Termination" # if this string is in stderr, the job failed + ) + CT_COSMOTHERM_CONFIG_FILE = ( + "cosmotherm.inp" # hard-coded name of the input file generated before execution + ) + CT_COSMOTHERM_OUTPUT_FILE = ( + "cosmotherm.out" # hard-coded name of the output file generated by cosmotherm + ) + CT_COSMOTHERM_TAB_ENDING = "cosmother.tab" + + CT_CONFIG = "cosmotherm_config" + CT_CONFIG_DEFAULTPATH = "icolos/config/cosmo/default_cosmo.config" + + # control script fields + # --------- + CONTROL_COSMO_OUT = ( + # line, after which insertion is to be put (separate line) + "$cosmo_out file=n" + ) + CONTROL_COSMO_INSERTION = "$cosmo_isorad" + CONTROL_COSMO_REPLACE = ( + "$cosmo_out file=mol.cosmo" # after insertion, replace "$cosmo_out" with this + ) + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class CosmoOutputEnum: + + PATTERN = "pattern" + ELEMENT = "element" + + # general block + # --------- + # this works, because we always set the name of the compound to "mol" + GENERAL_BLOCK_PATTERN_STRING = "--- Compound 1 (mol) ---" + # alternatively, one could search for "Compound 1 ", as we also feed it in in order + + # the key (e.g. "E_cosmo") is the tag name for the SDF write-out, "pattern" identifies the line and + # "element" is the number (on the right side, after the ':') of the element (starting with 0) from a + # split using ' ', respectively + GENERAL_BLOCK_ANNOTATIONS = { + "E_cosmo": {"pattern": "E_COSMO+dE", "element": 0}, + "volume": {"pattern": "Volume", "element": 0}, + "area": {"pattern": "Area", "element": 0}, + "dipole": {"pattern": "Dipole moment", "element": 0}, + "HB_acc": {"pattern": "H-bond moment (accept)", "element": 0}, + "HB_don": {"pattern": "H-bond moment (donor)", "element": 0}, + "sigma1": {"pattern": "Sigma moments", "element": 0}, + "sigma2": {"pattern": "Sigma moments", "element": 1}, + "sigma3": {"pattern": "Sigma moments", "element": 2}, + "sigma4": {"pattern": "Sigma moments", "element": 3}, + "sigma5": {"pattern": "Sigma moments", "element": 4}, + "sigma6": {"pattern": "Sigma moments", "element": 5}, + } + + # solvent blocks + # --------- + SOLVENT_BLOCK_PATTERN_STRING = "Gibbs Free Energy of Solvation" + SOLVENT_BLOCK_START_PATTERN = "----------------------" + SOLVENT_BLOCK_BODY_START_PATTERN = "Compound: 1 (mol)" + SOLVENT_TRANSLATE_SOLVENT = { + "h2o": "h2o", + "methanol": "meoh", + "1-octanol": "octanol", + "dimethyls": "dmso", + "cyclohexa": "cychex", + "chcl3": "chcl3", + "acetonitr": "acn", + "thf": "thf", + } + SOLVENT_REPLACEHOLDER = "{solvent}" + SOLVENT_BLOCK_BODY_ANNOTATIONS = { + "Gsolv_{solvent}": {"pattern": "Gibbs Free Energy of Solvation", "element": 0}, + "G_{solvent}": {"pattern": "Free energy of molecule in mix", "element": 0}, + } + SOLVENT_BLOCK_HEADER_COMPOUNDS_PATTERN = "Compound " + SOLVENT_BLOCK_HEADER_MOLFRACTION_PATTERN = "Mole Fraction" + SOLVENT_BLOCK_CURRENT_FRACTION_VALUE = "1.0000" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class FeatureCounterEnum: + + PROPERTY_NUM_RINGS = "num_rings" + PROPERTY_NUM_AROMATIC_RINGS = "num_aromatic_rings" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class CrestEnum: + + # Note: The first argument is usually a coordinate file in TM (coord, Bohr), + # Xmol (*.xyz, Ang.) or SDF format. + # Call: "crest " + # This collection is based on Version 2.10.2, compatible with XTB version 6.1 and later + + # General options (all) + # --------- + CREST = "crest" # binary name + CREST_H = "-h" # print the help message + CREST_HELP_IDENTIFICATION_STRING = "Conformer-Rotamer Ensemble Sampling Tool" + CREST_V3 = "-v3" # version 3 (the default iMTD-GC workflows) + CREST_G = "-g" # 1 string parameter; use GBSA implicit solvent for solvent + CREST_CHRG = "-chrg" # 1 int parameter; the molecule's charge + CREST_UHF = "-uhf" # 1 int parameter; set =Nα-Nβ electrons + CREST_NOZS = ( + "-nozs" # do not perform z-mat sorting, default: z-matrix will be sorted + ) + CREST_ZS = "-zs" # perform z-matrix sorting [default] + # 1 level parameter (vloose, loose, normal, tight, vtight); default: vtight + CREST_OPT = "-opt" + CREST_GFN1 = "-gfn1" # use GFN1-xTB + CREST_GFN2 = "-gfn2" # use GFN2-xTB [default] + CREST_GFF = "-gff" # use GFN-FF (requires xtb 6.3 or newer) + # 1 string parameter; specify name of the xtb binary that should be used + CREST_XNAM = "-xnam" + # 1 float parameter; set energy window in kcl/mol, default: 6.0 kcal/mol + CREST_EWIN = "-ewin" + CREST_RTHR = ( + "-rthr" # 1 float parameter; set RMSD threshold in Ang, default: 0.125 Ang + ) + CREST_ETHR = ( + "-ethr" # 1 float parameter; set E threshold in kcal/mol, default: 0.1 kcal/mol + ) + CREST_BTHR = ( + "-bthr" # 1 float parameter; set Rot. const. threshold, default: 15.0 MHz + ) + # 1 float parameter; Boltzmann population threshold, default: 0.05 (= 5%) + CREST_PTHR = "-pthr" + CREST_EQV = "-eqv" # activate NMR-equivalence printout + CERST_NMR = "-nmr" # activate NMR-mode (= [-eqv] + opt. level: vtight) + CREST_PRSC = "-prsc" # create a scoord.* file for each conformer + CREST_NICEPRINT = "-niceprint" # progress bar printout for optimizations + CREST_DRY = "-dry" # performs a "dry run"; only prints the settings + + # iMTD-GC workflows (selected) + # --------- + CREST_CROSS = "-cross" # do the GC part [default] + CREST_NOCROSS = "-nocross" # don't do the GC part + # 1 int parameter; set SHAKE mode for MD (0=off, 1=H-only, 2=all bonds), default: 2 + CREST_SHAKE = "-shake" + CREST_TSTEP = "-tstep" # 1 int parameter; set MD time step in fs, default: 5 + + # other (selected) + # --------- + CREST_T = ( + # 1 int parameter; set total compound_number of CPUs (threads) to be used + "-T" + ) + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class AutoDockVinaEnum: + # executable "vina" + parameters + # --------- + VINA = "vina" + VINA_CALL = "vina" # the binary call + VINA_HELP = "--help" # display usage summary + VINA_HELP_ADVANCED = "--help_advanced" # display usage summary (with all options) + VINA_VERSION = "--version" # diplay program version + VINA_VERSION_IDENTIFICATION_STRING = ( + "AutoDock Vina 1.1.2" # string, which needs to be present in help output in + ) + # order to assume "AutoDock Vina" can be properly used + VINA_CONFIGURATION = ( + "--config" # path to configuration file, where options below can be put + ) + + # input + VINA_RECEPTOR = "--receptor" # rigid part of the receptor (PDBQT) + VINA_LIGAND = "--ligand" # ligand (PDBQT); only one at a time + VINA_FLEX = "--flex" # flexible side chains, if any (PDBQT) + + # search space + VINA_CENTER_X = "--center_x" # X coordinate of the center + VINA_CENTER_Y = "--center_y" # Y coordinate of the center + VINA_CENTER_Z = "--center_z" # Z coordinate of the center + VINA_SIZE_X = "--size_x" # size in the X dimension (Angstroms) + VINA_SIZE_Y = "--size_y" # size in the X dimension (Angstroms) + VINA_SIZE_Z = "--size_z" # size in the X dimension (Angstroms) + + # output + VINA_OUT = "--out" # output models (PDBQT), the default is chosen based on the + # ligand file name + + # advanced options + VINA_SCORE_ONLY = "--score_only" # score only - search space can be omitted + VINA_LOCAL_ONLY = "--local_only" # do local search only + VINA_RANDOMIZE_ONLY = ( + "--randomize_only" # randomize input, attempting to avoid clashes + ) + VINA_WEIGHT_GAUSS1 = "--weight_gauss1" # gauss_1 weight (default: -0.035579) + VINA_WEIGHT_GAUSS2 = "--weight_gauss2" # gauss_2 weight (default: -0.005156) + VINA_WEIGHT_REPULSION = ( + "--weight_repulsion" # repulsion weight (default: 0.84024500000000002) + ) + VINA_WEIGHT_HYDROPHOBIC = ( + "--weight_hydrophobic" # hydrophobic weight (-0.035069000000000003) + ) + VINA_WEIGHT_HYDROGEN = ( + "--weight_hydrogen" # hydrogen bond weight (-0.58743900000000004) + ) + VINA_WEIGHT_ROT = "--weight_rot" # N_rot weight (default: 0.058459999999999998) + + # miscellaneous (optional) + VINA_CPU = "--cpu" # the number of CPUs to use (the default is to try to detect + # the number of CPUs or, failing that, use 1) + VINA_SEED = "--seed" # explicit random seed + VINA_EXHAUSTIVENESS = ( + "--exhaustiveness" # exhaustiveness of the global search (roughly proportional + ) + # to time): 1+ (default: 8) + VINA_NUM_MODES = ( + "--num_modes" # maximum number of binding modes to generate (default: 9) + ) + VINA_ENERGY_RANGE = "--energy_range" # maximum energy difference between the best binding mode and the + # worst one displayed [kcal/mol] (default: 3) + + # --------- + # Vina output specifications + # --------- + ADV_PDBQT = ".pdbqt" + + # the score is part of a tag in the PDBQT -> SDF translated output (tag "REMARK"), which looks like that: + # < REMARK > + # VINA RESULT: -9.1 0.000 0.000 + # Name = /tmp/tmpjssiy8z4.pdb + # ... + + # Note, that the three values are: affinity [kcal/mol] | dist from best mode (rmsd l.b.) | rmsd (u. b.) + REMARK_TAG = "REMARK" + RESULT_LINE_IDENTIFIER = "VINA RESULT" + RESULT_LINE_POS_SCORE = 2 + RESULT_LINE_POS_RMSDTOBEST_LB = 3 + RESULT_LINE_POS_RMSDTOBEST_UB = 4 + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class CrestOutputEnum: + + COORD = "coord" + COORD_ORIGINAL = "coord.original" + CRE_MEMBERS = "cre_members" + CREST_ENERGIES = "crest.energies" + CREST_BEST_XYZ = "crest_best.xyz" + CREST_CONFORMERS_SDF = "crest_conformers.sdf" + CREST_CONFORMERS_XYZ = "crest_conformers.xyz" + CREST_ROTAMERS_XYZ = "crest_rotamers.xyz" + XTBTOPO_MOL2 = "xtbtopo.mol" + + # format properties + PREFIX_ENERGIES_XYZ = " " + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class OpenBabelEnum: + + # executable "obabel" + parameters + # --------- + OBABEL = "obabel" + OBABEL_IDENTIFICATION_STRING = "-O" + OBABEL_INPUTFORMAT_PDBQT = ( + # sets the input format to "PDBQT" (output of "AutoDock Vina") + "-ipdbqt" + ) + OBABEL_INPUTFORMAT_XYZ = ( + "-ixyz" # sets the input format to "XYZ" (format in XTB/TM) + ) + OBABEL_INPUTFORMAT_PDB = "-ipdb" + OBABEL_INPUTFORMAT_SDF = "-isdf" # sets the input format to "SDF" + OBABEL_P = "-p" # sets the value (e.g. "-p 7.4") for protonation + # note, that this overwrites "--addpolarh", which is thus not used + # specifies the output path (directly pasted afterwards, e.g. "-Omypath.pdb") + OBABEL_O = "-O" + OBABEL_OUTPUT_FORMAT_PDBQT = ( + # sets the output format to "PDBQT" (input for "AutoDock Vina") + "-opdbqt" + ) + OBABEL_OUTPUT_FORMAT_SDF = "-osdf" # sets the output format to "SDF" + OBABEL_OUTPUTFORMAT_XYZ = ( + "-oxyz" # sets the output format to "XYZ" (format in XTB/TM) + ) + OBABEL_X = "-x" # specifies generation options + OBABEL_M = "-m" # produce multiple output files + # one of the 'X' options ("-x"), which disables the tree construction of the receptor + # (makes it static), directly pasted together: e.g. "-xr" + OBABEL_X_R = "r" + # sets the partial charge generation method (execute "obabel -L charges" to see list of available methods) + OBABEL_PARTIALCHARGE = "--partialcharge" + # one method to compute the partial charges, used as: "--partialcharge gasteiger" + OBABEL_PARTIALCHARGE_GASTEIGER = "gasteiger" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class OpenBabelOutputEnum: + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class OMEGAEnum: + + # executable "oeomega" + parameters; the first parameter is a string indicating the mode + # --------- + OMEGA = "oeomega" + OMEGA_HELP = "--help" # print the help message + OMEGA_HELP_IDENTIFICATION_STRING = "To cite OMEGA please" + OMEGA_MODE_CLASSIC = "classic" # The original customizable omega2 interface + OMEGA_MODE_MACROCYCLE = "macrocycle" # Conformer generation for macrocycles + OMEGA_MODE_ROCS = "rocs" # Optimal conformer generation for ROCS + OMEGA_MODE_POSE = "pose" # Optimal conformer generation for molecular + # alignment and pose prediction by docking + OMEGA_MODE_DENSE = "dense" # Optimal conformer generation for FREEDOM + + # mode "classic" parameters + # --------- + CLASSIC_INPUT = "-in" # Input filename (required, if "-param" not set) + CLASSIC_OUTPUT = "-out" # Output filename (required, if "-param" not set) + CLASSIC_PARAM = "-param" # A parameter file + CLASSIC_PREFIX = "-prefix" # Prefix to use to name output files + CLASSIC_PROGRESS = "-progress" # Method of showing job progress. Either "none", + # "dots", "log" or "percent". + CLASSIC_SDENERGY = "-sdEnergy" # Writes conformer energies to the SD tag field + CLASSIC_VERBOSE = "-verbose" # Triggers copious logging output + # Generate structures from connection-table only. + CLASSIC_FROMCT = "-fromCT" + CLASSIC_EWINDOW = "-ewindow" # Energy window used for conformer selection. + CLASSIC_MAXCONFS = ( + "-maxconfs" # Maximum compound_number of conformations to be saved + ) + CLASSIC_RMS = "-rms" # RMS threshold used to determine duplicate + # conformations + # if set to false ("-canonOrder false"), OMEGA will not update the atom orders + CLASSIC_CANON_ORDER = "-canonOrder" + CLASSIC_STRICTSTEREO = ( + "-strictstereo" # Requires that all chiral atoms and bonds have + ) + # specified stereo + CLASSIC_STRICT = "-strict" # A convenience flag to set "-strictstereo", + # "-strictatomtyping" and "-strictfrags" to true + # or false and override [sic] previous settings. + + # mode "rocs" parameters + # --------- + ROCS_INPUT = "-in" # Input filename (required, if "-param" not set) + ROCS_OUTPUT = "-out" # Output filename (required, if "-param" not set) + ROCS_PARAM = "-param" # A parameter file + ROCS_PREFIX = "-prefix" # Prefix to use to name output files + ROCS_PROGRESS = "-progress" # Method of showing job progress. Either "none", + # "dots", "log" or "percent". + ROCS_VERBOSE = "-verbose" # Triggers copious logging output + + # mode "dense" parameters + # --------- + DENSE_INPUT = "-in" # Input filename (required, if "-param" not set) + DENSE_OUTPUT = "-out" # Output filename (required, if "-param" not set) + DENSE_PARAM = "-param" # A parameter file + DENSE_PREFIX = "-prefix" # Prefix to use to name output files + DENSE_PROGRESS = "-progress" # Method of showing job progress. Either "none", + # "dots", "log" or "percent". + DENSE_VERBOSE = "-verbose" # Triggers copious logging output + + # mode "macrocycle" parameters + # --------- + MACROCYCLE_INPUT = "-in" # Input filename (required, if "-param" not set) + # Output filename (required, if "-param" not set) + MACROCYCLE_OUTPUT = "-out" + MACROCYCLE_PARAM = "-param" # A parameter file + MACROCYCLE_PREFIX = "-prefix" # Prefix to use to name output files + MACROCYCLE_EWINDOW = "-ewindow" # Energy window for the output conformers + MACROCYCLE_ITERATION_CYCLE_SIZE = ( + "-iteration_cycle_size" # Number of iterations to run before checking if a + ) + # new minimum was found (run will finish if no new + # minimum is found). + MACROCYCLE_MAXCONFS = ( + "-maxconfs" # Maximum compound_number of conformations to be saved + ) + MACROCYCLE_MAX_ITERATIONS = ( + # Maximum compound_number of iterations (calculation may + "-max_iterations" + ) + # converge before reaching this compound_number). + MACROCYCLE_REF_TOLERANCE = ( + "-ref_tolerance" # RMS gradient tolerance for force field refinement + ) + MACROCYCLE_RMS = "-rms" # RMS clustering threshold (if 0.0 clustering is + # skipped). + MACROCYCLE_RMSD_DEDUPLICATE = ( + "-rmsd_deduplicate" # Deduplicate using a RMSD calculation (slow) + ) + # rather than energy and torsion comparison + + # mode "pose" parameters + # --------- + POSE_INPUT = "-in" # Input filename (required, if "-param" not set) + POSE_OUTPUT = "-out" # Output filename (required, if "-param" not set) + POSE_PREFIX = "-prefix" # Prefix to use to name output files + POSE_PROGRESS = "-progress" # Method of showing job progress. Either "none", + # "dots", "log" or "percent". + POSE_VERBOSE = "-verbose" # Triggers copious logging output + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class OMEGAOutputEnum: + + # tags + CLASSIC_ENERGY_OUTPUT_TAG = "mmff94smod_NoEstat" + + # other + # This hard-coded output name will be parsed. + OUTPUT_SDF_NAME = "omega_out.sdf" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class XTBEnum: + + # Usage: xtb[options] [options] + # < geometry > may be provided as valid TM coordinate file(*coord in Bohr) or in xmol format(*xyz in Ångström). + # Output Conventions: total energies are given in atomic units (Eh), gaps/HL energies are given in eV + XTB = "xtb" + XTB_HELP = "--help" + XTB_HELP_IDENTIFICATION_STRING = "normal termination of xtb" # written to stderr + XTB_CHRG = "--chrg" # 1 int parameter; specify molecular charge + XTB_UHF = "--uhf" # 1 int parameter; specify Nalph-Nbeta + # 1 float parameter; accuracy for SCC calculation, lower is better (default = 1.0) + XTB_ACC = "--acc" + # 1 int parameter; compound_number of iterations in SCC (default = 250) + XTB_ITERATION = "--iteration" + # 1 level parameter; compound_number of cycles in ANCopt (default = automatic) + XTB_CYCLES = "--cycles" + XTB_GFN = ( + # 1 int parameter; specify parametrisation of GFN-xTB (default = 2) + "--gfn" + ) + XTB_QMDFF = "--qmdff" # use QMDFF for single point (needs solvent-file) + XTB_TM = "--tm" # use TURBOMOLE for single point (needs control-file) + XTB_ORCA = "--orca" # use ORCA for single point (writes ORCA input) + XTB_MOPAC = "--mopac" # use MOPAC for single point (writes MOPAC input) + # uses periodic boundary conditions (in developement) + XTB_PERIODIC = "--periodic" + # 1 float parameter; electronic temperature (default = 300K) + XTB_ETEMP = "--etemp" + # 1 level parameter; generalized born (GB) model with solvent accessable surface area (SASA) model + XTB_GBSA = "--gbsa" + XTB_OPT = "--opt" # 1 level parameter; either "crude", "sloppy", + # "loose", "normal" (default), "tight", "verytight" + XTB_P = "-P" # 1 int parameter; compound_number of cores + + # --vparam FILE Parameter file for vTB calculation + # --xparam FILE Parameter file for xTB calculation (not used) + # --pop requests printout of Mulliken population analysis + # --molden requests printout of molden file + # --dipole requests dipole printout + # --wbo requests Wiberg bond order printout + # --lmo requests localization of orbitals + # --fod requests FOD calculation, adjusts electronic temperature to 12500 K if possible + # --scc, --sp performs a single point calculation + # --vip performs calculation of ionisation potential + # --vea performs calculation of electron affinity + # --vipea performs calculation of IP and EA + # --vomega performs calculation of electrophilicity index + # --vfukui calculate Fukui indicies using GFN-xTB + # --esp calculate electrostatic potential on VdW-grid + # --stm calculate STM image + # --grad performs a gradient calculation + # --optts [LEVEL] [ROOT] call ancopt(3) to perform a transition state optimization, may + # need to perform a hessian calculation first + # --hess perform a numerical hessian calculation on input geometry + # --ohess [LEVEL] perform a numerical hessian calculation on an ancopt(3) optimized geometry + # --md molecular dynamics simulation on start geometry + # --omd molecular dynamics simulation on ancopt(3) optimized geometry, a loose + # optimization level will be chosen. + # --metadyn [INT] meta dynamics simulation on start geometry saving INT snapshots to bias the simulation + # --siman conformational search by simulated annealing based on molecular dynamics. + # Conformers are optimized with ancopt. + # --modef INT modefollowing algorithm. INT specifies the mode that should be used for the modefollowing. + # -I,--input FILE use FILE as input source for xcontrol(7) instructions + # --namespace STRING give this xtb(1) run a namespace. All files, even temporary ones, will + # be named accordingly (might not work everywhere). + # --[no]copy copies the xcontrol file at startup (default = true) + # --[no]restart restarts calculation from xtbrestart (default = true) + # -P,--parallel INT compound_number of parallel processes + # --define performs automatic check of input and terminate + # --version print version and terminate + # --citation print citation and terminate + # --license print license and terminate + # -v,--verbose be more verbose (not supported in every unit) + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class XTBOutputEnum: + + XTBOPT_SDF = "xtbopt.sdf" + XTBTOPO_SDF = "xtbtopo.sdf" + XTBOPT_LOG = "xtbopt.log" + XTBRESTART = "xtbrestart" + WBO = "wbo" + CHARGES = "charges" + SUCCESS = "success" + FAILURE = "failure" + + # tags + TOTAL_ENERGY_TAG = "total energy / Eh" + GRADIENT_TAG = "gradient norm / Eh/a0" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class MacromodelEnum: + + MACROMODEL = "macromodel" + MACROMODEL_HELP = "-h" + MACROMODEL_HELP_IDENTIFICATION_STRING = "MacroModel Startup Script" + MACROMODEL_NJOBS = "-NJOBS" + MACROMODEL_WAIT = "-WAIT" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class ModelBuilderEnum: + + # OPTBUILD parameters + OPTBUILD_ENTRY_POINT = "optbuild.py" + CONFIG = "--config" + BEST_BUILDCONFIG_OUTPATH = ( + "--best-buildconfig-outpath" # path to the output JSON for the best trial + ) + BEST_MODEL_OUTPATH = ( + # path to the output model (PKL) for the best trial + "--best-model-outpath" + ) + MERGED_MODEL_OUTPATH = ( + "--merged-model-outpath" # path to the production output model (PKL) + ) + PERSISTENCE_MODE = "--model-persistence-mode" + PERSISTENCE_MODE_PLAINSKLEARN = "plain_sklearn" + PERSISTENCE_MODE_SKLEARNWITHOPTUNAAZ = "sklearn_with_optunaz" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class LigprepEnum: + + LIGPREP = "ligprep" + LIGPREP_HELP = "-h" + LIGPREP_HELP_IDENTIFICATION_STRING = "usage: ligprep [options]" + # SMI input followed by (alternatives: "-icsv", "-imae" and "-isd") + LIGPREP_INPUT_ISMI = "-ismi" + LIGPREP_OUTPUT_OSD = ( + "-osd" # SD(F) output followed by (alternative: "-omae") + ) + # not used in AZdock, but would be an option to feed parameters from configuration file + LIGPREP_INP_CONFIG = "-inp" + LIGPREP_EPIK = ( + "-epik" # Use "Epik" for ionization and tautomerization (Recommended) + ) + LIGPREP_PH = ( + # Effective / target pH; followed by (use 7.0 as default) + "-ph" + ) + # pH tolerance for generated structures; followed by (use 2.0 as default) + LIGPREP_PHT = "-pht" + LIGPREP_AC = ( + "-ac" # Do not respect existing chirality properties and do not respect + ) + # chiralities from the input geometry. Generate stereoisomers for all chiral centers up to + # the number permitted (specified using the -s option). This is equivalent to "Generate + # all combinations" in the Ligand Preparation user interface. Default + # behavior is to respect only explicitly indicated chiralities. + # Filter structures via LigFilter using specifications from the file provided. Default: do not filter. + LIGPREP_F = "-f" + LIGPREP_G = ( + # Respect chiralities from input geometry when generating stereoisomers. + "-g" + ) + # Generate up to this stereoisomers per input structure. (Default: 32). + LIGPREP_S = "-s" + # Force-field to be used for the final geometry optimization (either 14 or 16, which refers to OPLS_2005 and + LIGPREP_BFF = "-bff" + # OPLS3e respectively. Default: 14 + LIGPREP_FF_OPLS_2005 = "14" # Default force-field + LIGPREP_FF_OPLS3e = "16" # Alternative force-field + LIGPREP_NJOBS = ( + # Divide the overall job into NJOBS subjobs. Set to 1 by default. + "-NJOBS" + ) + # Divide the overall job into subjobs with no more than NSTRUCTS structures. Set to 1 by default. + LIGPREP_NSTRUCTS = "-NSTRUCTS" + LIGPREP_HOST = ( + # Run the job on remotely on the indicated host entry. + "-HOST" + ) + LIGPREP_HOST_LOCALHOST = "localhost" # Default value for the run. + LIGPREP_WAIT = "-WAIT" # Do not return a prompt until the job completes. + + LIGPREP_LOG_ENDING = ".log" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class GlideEnum: + # executable "glide" + parameters + # note, that you can get the full list of parameters with "$SCHRODINGER/glide -k" + # --------- + GLIDE = "glide" + GLIDE_CALL = "$SCHRODINGER/glide" + GLIDE_HELP = "-h" + GLIDE_HELP_IDENTIFICATION_STRING = "positional arguments:" + GLIDE_WAIT = "-WAIT" + GLIDE_OVERWRITE = "-OVERWRITE" # Remove previous job files before running. + GLIDE_NJOBS = "-NJOBS" # Divide the overall job into NJOBS subjobs. + GLIDE_HOST = "-HOST" # Run job remotely on the indicated host entry. + # WARNING: does not seem to be supported (any longer?) - probably "-NOLOCAL" now? + GLIDE_TMPLAUNCHDIR = "-TMPLAUNCHDIR" + # WARNING: does not seem to be supported (any longer?) + GLIDE_ATTACHED = "-ATTACHED" + # amide bond rotation behavior: "fixed", "free", "penal", "trans", "gen[eralized]" + GLIDE_AMIDE_MODE = "AMIDE_MODE" + # bypass elimination of poses in rough scoring stage (useful for fragment docking) + GLIDE_EXPANDED_SAMPLING = "EXPANDED_SAMPLING" + GLIDE_GRIDFILE = "GRIDFILE" # path to grid (.grd or .zip) file + GLIDE_LIGANDFILE = "LIGANDFILE" # Glide docking ligands file name + # expand size of the Glide funnel by N times to process poses from N confgen runs with minor + # perturbations to the input ligand coordinates + GLIDE_NENHANCED_SAMPLING = "NENHANCED_SAMPLING" + # format for file containing docked poses: "poseviewer" for _pv.mae output; "ligandlib" for + # _lib.mae; similarly "poseviewer_sd" and "ligandlib_sd" for sdf output; "phase_subset" for bypassing + # _lib or _pv in favor of a Phase subset file. + GLIDE_POSE_OUTTYPE = "POSE_OUTTYPE" + GLIDE_POSE_OUTTYPE_LIGANDLIB = ( + "ligandlib_sd" # sets the write-out to SDF (easily parsed) + ) + # uses the poseviewer (MAE format) write-out; contains the receptor + GLIDE_POSE_OUTTYPE_POSEVIEWER = "poseviewer" + GLIDE_POSES_PER_LIG = ( + "POSES_PER_LIG" # maximum number of poses to report per each input ligand + ) + # maximum number of best-by-Emodel poses to submit to post-docking minimization + GLIDE_POSTDOCK_NPOSE = "POSTDOCK_NPOSE" + GLIDE_POSTDOCKSTRAIN = ( + "POSTDOCKSTRAIN" # include strain correction in post-docking score + ) + # glide docking precision ("SP", "XP" or "HTVS") + GLIDE_PRECISION = "PRECISION" + # reward formation of intramolecular hydrogen bonds in the ligand + GLIDE_REWARD_INTRA_HBONDS = "REWARD_INTRA_HBONDS" + GLIDE_USE_CONS = "USE_CONS" + GLIDE_NREQUIRED_CONS = "NREQUIRED_CONS" + # if any of these string is present in the logfile associated with a subjob, all went well + GLIDE_LOG_SUCCESS_STRING = "glide_sort command succeeded" + GLIDE_LOG_FINISHED_STRINGS = {"Exiting Glide"} + GLIDE_LOG_FAIL_STRINGS = { + "*** Error in", + # if any of these strings is present in the logfile associated with a subjob, there was an + # issue resulting in the complete failure of the execution + "Glide cannot recover from this signal and will now abort.", + "======= Backtrace: =========", + } + + # "Glide: FATAL mmlewis error"} + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class SchrodingerExecutablesEnum: + + # executable "licadmin" + parameters + # --------- + LICADMIN = "licadmin" + LICADMIN_STAT = "STAT" # returns the list of tokens used / available + + # executable "sdconvert" + parameters + # --------- + SCHRODINGER_MODULE = "module load schrodinger/2021-2-js-aws" + SDCONVERT = "sdconvert" + SDCONVERT_CALL = "$SCHRODINGER/utilities/sdconvert" + SDCONVERT_HELP = "" + SDCONVERT_HELP_IDENTIFICATION_STRING = "mae : Maestro format" + SDCONVERT_A = "-a" # append structures to the output file + # input; note that the format is directly appended (e.g. "-isd") + SDCONVERT_I = "-i" + SDCONVERT_O = ( + # output; note that the format is directly appended (e.g. "-omae") + "-o" + ) + SDCONVERT_FORMAT_SD = "sd" # MDL SDfile format + SDCONVERT_FORMAT_PDB = "pdb" # PDB file format + SDCONVERT_FORMAT_MM = "mm" # MacroModel (.dat) format + SDCONVERT_FORMAT_MAE = "mae" # Maestro format + SDCONVERT_TITLE = ( + "-title" # define SD property as the source of the Maestro title + ) + SDCONVERT_NOSTEREO = ( + "-nostereo" # do not record the atom parity info from the input file + ) + # do not convert aromatic type 4 bonds to single and double bonds (which is the Maestro convention) + SDCONVERT_NOAROM = "-noarom" + + # executable "structcat" + parameters + STRUCTCAT = "structcat" + STRUCT_SPLIT_CALL = "$SCHRODINGER/run split_structure.py" + STRUCT_SPLIT = "structsplit" + STRUCTCONVERT = "structconvert" + STRUCTCAT_CALL = "$SCHRODINGER/utilities/structcat" + STRUCTCONVERT_CALL = "$SCHRODINGER/utilities/structconvert" + FMP_STATS = "fmp_stats" + FMP_STATS_CALL = "$SCHRODINGER/run -FROM scisol fmp_stats.py" + STRUCTCAT_HELP = "-h" + STRUCTCAT_HELP_IDENTIFICATION_STRING = " must be one of" + # input; note that the format is directly appended (e.g. "-isd") + STRUCTCAT_I = "-i" + STRUCTCAT_O = ( + # output; note that the format is directly appended (e.g. "-omae") + "-o" + ) + STRUCTCAT_FORMAT_MAE = "mae" # Maestro format + STRUCTCAT_FORMAT_SD = "sd" # MDL SDfile format + STRUCTCAT_FORMAT_SDF = "sdf" + STRUCTCAT_FORMAT_PDB = "pdb" # PDB format + STRUCTCAT_FORMAT_MOL2 = "mol2" # sybyl (.mol2) format + POSEVIEWER_FILE_KEY = "pv.maegz" + PREPWIZARD = "prepwizard" + PREPWIZARD_CALL = "$SCHRODINGER/utilities/prepwizard" + MULTISIM_EXEC = "$SCHRODINGER/utilities/multisim" + AWS_BINARY_LOC = ( + "ssh /opt/schrodinger/suite/installations/default/" + ) + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class PrimeEnum: + + PRIME_MMGBSA = "prime_mmgbsa" + PRIME_HELP = "-h" + PRIME_HELP_IDENTIFICATION_STRING = ( + "run $SCHRODINGER/prime_mmgbsa -h for a complete listing of all options." + ) + PRIME_NJOBS = "-NJOBS" + PRIME_WAIT = "-WAIT" + + # settings + PRIME_OUTTYPE = "-out_type" + PRIME_OUTTYPE_LIGAND = "LIGAND" + + # tags in output + PRIME_MMGBSA_TOTAL_ENERGY = ( + "r_psp_MMGBSA_dG_Bind" # total energy of binding: complex - receptor - ligand + ) + PRIME_MMGBSA_TOTAL_ENERGY_NS = ( + # as above but without strain energy correction + "r_psp_MMGBSA_dG_Bind(NS)" + ) + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class PantherEnum: + + PANTHER_PTYHON2 = "python2" + PANTHER_ENTRYPOINT = "panther.py" + PANTHER_CONFIG = "panther_config.in" + PANTHER_OUTPUT_FILE = "neg_image.mol2" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class ShaepEnum: + + SHAEP_EXECUTABLE = "shaep" + OUTPUT_SIMILARITY = "similarity.txt" + NEGATIVE_IMAGE_OUTPUT_FILE = "neg_image.mol2" + CONFORMER_PATH = "conformer.sdf" + TAG_SHAPE_SIMILARITY = "shape_similarity" + TAG_ESP_SIMILARITY = "esp_similarity" + + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class GromacsEnum: + + # gmx programs + PDB2GMX = "gmx pdb2gmx" + EDITCONF = "gmx editconf" + SOLVATE = "gmx solvate" + GROMPP = "gmx grompp" + GENION = "gmx genion" + MDRUN = "gmx mdrun" + MAKE_NDX = "gmx make_ndx" + GENRESTR = "gmx genrestr" + TRJCAT = "gmx trjcat" + TRJCONV = "gmx trjconv" + CLUSTER = "gmx cluster" + RMS = "gmx rms" + ANTECHAMBER = "antechamber" + PDB2GMX_FAIL_ID_STRING = "Required option was not provided" + PYTHON = "python3" + ACPYPE_BINARY = "acpype.py" + MMPBSA = "gmx_MMPBSA" + DO_DSSP = "gmx do_dssp" + ACPYPE_PATH = "$ACPYPE" + CLUSTER_TS = "Rscript $MDPLOT/MDplot/inst/bash/MDplot_bash.R clusters_ts" + PRIMARY_COMPONENTS = ["Protein", "DNA", "RNA"] + # from residuetypes.dat + AMBER_PARAMETRISED_COMPONENTS = [ + "ABU", + "ACE", + "AIB", + "ALA", + "ARG", + "ARGN", + "ASN", + "ASN1", + "ASP", + "ASP1", + "ASPH", + "ASPP", + "ASH", + "CT3", + "CYS", + "CYS1", + "CYS2", + "CYSH", + "DALA", + "GLN", + "GLU", + "GLUH", + "GLUP", + "GLH", + "GLY", + "HIS", + "HIS1", + "HISA", + "HISB", + "HISH", + "HISD", + "HISE", + "HISP", + "HSD", + "HSE", + "HSP", + "HYP", + "ILE", + "LEU", + "LSN", + "LYS", + "LYSH", + "MELEU", + "MET", + "MEVAL", + "NAC", + "NME", + "NHE", + "NH2", + "PHE", + "PHEH", + "PHEU", + "PHL", + "PRO", + "SER", + "THR", + "TRP", + "TRPH", + "TRPU", + "TYR", + "TYRH", + "TYRU", + "VAL", + "PGLU", + "HID", + "HIE", + "HIP", + "LYP", + "LYN", + "CYN", + "CYM", + "CYX", + "DAB", + "ORN", + "HYP", + "NALA", + "NGLY", + "NSER", + "NTHR", + "NLEU", + "NILE", + "NVAL", + "NASN", + "NGLN", + "NARG", + "NHID", + "NHIE", + "NHIP", + "NHISD", + "NHISE", + "NHISH", + "NTRP", + "NPHE", + "NTYR", + "NGLU", + "NASP", + "NLYS", + "NORN", + "NDAB", + "NLYSN", + "NPRO", + "NHYP", + "NCYS", + "NCYS2", + "NMET", + "NASPH", + "NGLUH", + "CALA", + "CGLY", + "CSER", + "CTHR", + "CLEU", + "CILE", + "CVAL", + "CASN", + "CGLN", + "CARG", + "CHID", + "CHIE", + "CHIP", + "CHISD", + "CHISE", + "CHISH", + "CTRP", + "CPHE", + "CTYR", + "CGLU", + "CASP", + "CLYS", + "CORN", + "CDAB", + "CLYSN", + "CPRO", + "CHYP", + "CCYS", + "CCYS2", + "CMET", + "CASPH", + "CGLUH", + "DA", + "DG", + "DC", + "DT", + "DA5", + "DG5", + "DC5", + "DT5", + "DA3", + "DG3", + "DC3", + "DT3", + "DAN", + "DGN", + "DCN", + "DTN", + "A", + "U", + "C", + "G", + "RA", + "RU", + "RC", + "RG", + "RA5", + "RT5", + "RU5", + "RC5", + "RG5", + "RA3", + "RT3", + "RU3", + "RC3", + "RG3", + "RAN", + "RTN", + "RUN", + "RCN", + "RGN", + "SOL", + "WAT", + "HOH", + "OHH", + "TIP", + "T3P", + "T4P", + "T5P", + "T3H", + "K", + "NA", + "CA", + "MG", + "CL", + "ZN", + "CU1", + "CU", + "LI", + "NA+", + "RB", + "CS", + "F", + "CL-", + "BR", + "I", + "OH", + "Cal", + "IB+", + ] + IONS = ["ZN", "MG", "CU", "CA", "NA", "CL", "RB", "CS", "F", "BR", "I", "OH", "K"] + LIG_ID = "lig_id.lig" + LIG_EXT = "lig" + ATOMS = ["HETATM", "ATOM"] + ATOMTYPES = "[ atomtypes ]" + MOLECULETYPES = "[ moleculetype ]" + MOLECULES = "[ molecules ]\n" + SOLVENTS = ["HOH ", "SOL", "WAT"] + TERMINATIONS = ["ENDMDL", "END"] + + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class PMXEnum: + + # $PMX programs (see respective steps for the help strings) + ABFE = "$PMX abfe" + ANALYSE = "$PMX analyse" + ATOMMAPPING = "$PMX atomMapping" + DOUBLEBOX = "$PMX doublebox" + GENLIB = "$PMX genlib" + GENTOP = "$PMX gentop" + LIGANDHYBRID = "$PMX ligandHybrid" + MUTATE = "$PMX mutate" + + # custom scripts + BOX_WATER_IONS = "box_water_ions.py" + PREPARE_SIMULATIONS = "prepare_simulations.py" + PREPARE_TRANSITIONS = "prepare_transitions.py" + RUN_ANALYSIS = "run_analysis.py" + RUN_SIMULATIONS = "run_simulations.py" + ASSEMBLE_SYSTEMS = "assemble_systems.py" + + ANALYSE_HELP = "-h" + ANALYSE_HELP_SUCCESS_STRING = "Calculates free energies from fast" + + # standard file extensions + PDB = "pdb" + + # file system standards + HYBRID_STR_TOP = "hybridStrTop" + + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class PMXAtomMappingEnum: + + HELP = "--help" # show this help message and exit. <> + I1 = "-i1" # Input ligand structure 1. Default is "lig1.pdb" <1 pdb file> + I2 = "-i2" # Input ligand structure 2. Default is "lig2.pdb" <1 pdb file> + O1 = "-o1" # Output pairs: column1:mol1, column2:mol2. Default is "pairs1.dat" <1 dat file> + O2 = "-o2" # Output pairs: column1:mol2, column2:mol1. Default is "pairs2.dat" <1 dat file> + OPDB1 = "-opdb1" # Optional output: superimposed structure 1. <1 pdb file> + OPDB2 = "-opdb2" # Optional output: superimposed structure 2. <1 pdb file> + OPDBM1 = "-opdbm1" # Optional output: morphable atoms in structure 1 <2 pdb files> + OPDBM2 = "-opdbm2" # Optional output: morphable atoms in structure 2. <2 pdb files> + # Optional output: score of the morph. Default is "out_score.dat" <1 dat file> + SCORE = "-score" + N1 = "-n1" # Optional input: index of atoms to consider for mol1 <1 ndx file> + N2 = "-n2" # Optional input: index of atoms to consider for mol2 <1 ndx file> + LOG = "-log" # Output: log file. Default is "mapping.log" <1 log file> + NO_ALIGNMENT = ( + # Should the alignment method be disabled (default enabled) <> + "--no-alignment" + ) + # Should the MCS method be disabled (default enabled) <> + NO_MCS = "--no-mcs" + # Should non-polar hydrogens be discarded from morphing into any other hydrogen (default True) <> + NO_H2H = "--no-H2H" + # Should polar hydrogens be morphed into polar hydrogens (default False) <> + H2HPOLAR = "--H2Hpolar" + H2HEAVY = ( + # Should hydrogen be morphed into a heavy atom (default False) <> + "--H2Heavy" + ) + # Should rings only be used in the MCS search and alignemnt (default False) <> + RINGSONLY = "--RingsOnly" + # Should the distance criterium be also applied in the MCS based search (default False) <> + DMCS = "--dMCS" + # Try swapping the molecule order which would be a cross-check and require double execution time (default False) <> + SWAP = "--swap" + NO_CHIRALITY = ( + # Perform chirality check for MCS mapping (default True) <> + "--no-chirality" + ) + # Distance (nm) between atoms to consider them morphable for alignment approach (default 0.05 nm). <1 numeric value> + D = "--d" + # Maximum time (s) for an MCS search (default 10 s). <1 numeric value> + TIMEOUT = "--timeout" + + LIGAND_DIR = "input/ligands" + + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class PMXLigandHybridEnum: + + HELP = "--help" # Show this help message and exit. <> + I1 = "-i1" # Input ligand structure 1. Default is "lig1.pdb" <1 pdb file> + I2 = "-i2" # Input ligand structure 2. Default is "lig2.pdb" <1 pdb file> + ITP1 = "-itp1" # Input ligand topology 1. Default is "lig1.itp" <1 itp file> + ITP2 = "-itp2" # Input ligand topology 2. Default is "lig2.itp" <1 itp file> + PAIRS = "-pairs" # Optional input: atom pair mapping. <1 dat file> + N1 = "-n1" # Optional input: index of atoms to consider for mol1. <1 ndx file> + N2 = "-n2" # Optional input: index of atoms to consider for mol2. <1 ndx file> + OA = "-oA" # Output: hybrid structure based on the ligand 1. Default is "mergedA.pdb" <1 pdb file> + OB = "-oB" # Output: hybrid structure based on the ligand 2. Default is "mergedB.pdb" <1 pdb file> + OITP = "-oitp" # Output: hybrid topology. Default is "merged.itp". <1 itp file> + # Output: atomtypes for hybrid topology. Default is "ffmerged.itp" <1 itp file> + OFFITP = "-offitp" + LOG = "-log" # Output: log file. Default is "hybrid.log" <1 log file> + # Optional: if -pairs not provided, distance (nm) between atoms to consider them morphable + # for alignment approach (default 0.05 nm). <1 numerical value> + D = "--d" + FIT = "--fit" # Fit mol2 onto mol1, only works if pairs.dat is provided. <> + SPLIT = "--split" # Split the topology into separate transitions. + SCDUMM = ( + "--scDUMm" # Scale dummy masses using the counterpart atoms. <1 numeric value> + ) + SCDUMA = "--scDUMa" # Scale bonded dummy angle parameters. <1 numeric value> + SCDUMD = "--scDUMd" # Scale bonded dummy dihedral parameters. <1 numeric value> + DEANG = "--deAng" # Decouple angles composed of 1 dummy and 2 non-dummies. + + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class FepPlusEnum: + FEP_MAPPER = "$SCHRODINGER/run -FROM scisol fep_mapper.py" + FEP_EXECUTOR = "$SCHRODINGER/fep_plus" + FEP_HELP = "-h" + JSC_LIST = 'ssh "export SCHRODINGER=/opt/schrodinger/suite/installations/default && /opt/schrodinger/suite/installations/default/jsc list"' + JSC_TAIL_FILE = 'ssh "export SCHRODINGER=/opt/schrodinger/suite/installations/default && /opt/schrodinger/suite/installations/default/jsc tail-file' + DICT = "dict" + PATH = "path" + FEP_MAPPER_HELP_SUCCESS_STRING = "If given, the match will be allowed" + + +class PdbFixerEnum: + FIXER = "pdbfixer" + + +class DSSPEnum: + MKDSSP = "mkdssp" + + +class SlurmEnum: + SBATCH = "sbatch" + STATE = "State" + COMPLETED = "COMPLETED" + RUNNING = "RUNNING" + PENDING = "PENDING" + FAILED = "FAILED" diff --git a/icolos/utils/enums/step_enums.py b/icolos/utils/enums/step_enums.py new file mode 100644 index 0000000..61ecc44 --- /dev/null +++ b/icolos/utils/enums/step_enums.py @@ -0,0 +1,922 @@ +from enum import Enum + + +class StepBaseEnum(str, Enum): + # general settings + STEPID = "step_id" + + # different step types + STEP_TYPE = "type" + STEP_CREST = "CREST" + STEP_OMEGA = "OMEGA" + STEP_XTB = "XTB" + STEP_MACROMODEL = "MACROMODEL" + STEP_TURBOMOLE = "TURBOMOLE" + STEP_COSMO = "COSMO" + STEP_INITIALIZATION = "INITIALIZATION" + STEP_EMBEDDING = "EMBEDDING" + STEP_PREDICTION = "PREDICTION" + STEP_MODEL_BUILDING = "MODEL_BUILDING" + STEP_BOLTZMANN_WEIGHTING = "BOLTZMANN_WEIGHTING" + STEP_PKA_PREDICTION = "PKA_PREDICTION" + STEP_PRIME = "PRIME" + STEP_CLUSTERING = "CLUSTERING" + STEP_RMSD = "RMSD" + STEP_RMSFILTER = "RMSFILTER" + STEP_DATA_MANIPULATION = "DATA_MANIPULATION" + STEP_DESMOND = "DESMOND" + STEP_DESMOND_SETUP = "DESMOND_SETUP" + STEP_FILTER = "FILTER" + STEP_PANTHER = "PANTHER" + STEP_SHAEP = "SHAEP" + STEP_PDB2GMX = "PDB2GMX" + STEP_PDB2GMX_LIG = "PDB2GMX_LIG" + STEP_EDITCONF = "EDITCONF" + STEP_SOLVATE = "SOLVATE" + STEP_GENION = "GENION" + STEP_GROMPP = "GROMPP" + STEP_MDRUN = "MDRUN" + STEP_FEATURE_COUNTER = "FEATURE_COUNTER" + STEP_TRJCONV = "TRJCONV" + STEP_TRJCAT = "TRJCAT" + STEP_GMX_RMSD = "GMX_RMSD" + STEP_CLUSTER = "CLUSTER" + STEP_DO_DSSP = "DO_DSSP" + STEP_LIGPREP = "LIGPREP" + STEP_GLIDE = "GLIDE" + STEP_AUTODOCKVINA_DOCKING = "VINA_DOCKING" + STEP_AUTODOCKVINA_TARGET_PREPARATION = "VINA_TARGET_PREPARATION" + STEP_FEP_PLUS_SETUP = "FEP_PLUS_SETUP" + STEP_FEP_PLUS_EXEC = "FEP_PLUS_EXEC" + STEP_FEP_PLUS_ANALYSIS = "FEP_PLUS_ANALYSIS" + STEP_PREPWIZARD = "PREPWIZARD" + STEP_MDPOCKET = "MDPOCKET" + STEP_PDB_FIXER = "PDB_FIXER" + STEP_PEPTIDE_EMBEDDER = "PEPTIDE_EMBEDDER" + STEP_GMX_MMPBSA = "GMX_MMPBSA" + + # PMX SCRIPTS + STEP_PMX_ABFE = "PMX_ABFE" + STEP_PMX_ANALYSE = "PMX_ANALYSE" + STEP_PMX_ATOMMAPPING = "PMX_ATOMMAPPING" + STEP_PMX_DOUBLEBOX = "PMX_DOUBLEBOX" + STEP_PMX_GENLIB = "PMX_GENLIB" + STEP_PMX_GENTOP = "PMX_GENTOP" + STEP_PMX_LIGANDHYBRID = "PMX_LIGANDHYBRID" + STEP_PMX_MUTATE = "PMX_MUTATE" + STEP_PMX_SETUP = "PMX_SETUP" + STEP_PMX_PREPARE_SIMULATIONS = "PMX_PREPARE_SIMULATIONS" + STEP_PMX_BOX_WATER_IONS = "PMX_BOX_WATER_IONS" + STEP_PMX_PREPARE_TRANSITIONS = "PMX_PREPARE_TRANSITIONS" + STEP_PMX_RUN_SIMULATIONS = "PMX_RUN_SIMULATIONS" + STEP_PMX_ASSEMBLE_SYSTEMS = "PMX_ASSEMBLE_SYSTEMS" + STEP_PMX_RUN_ANALYSIS = "PMX_RUN_ANALYSIS" + + STEP_DSSP = "DSSP" + STEP_TS_CLUSTER = "TS_CLUSTER" + STEP_ESP_SIM = "ESP_SIM" + STEP_JOB_CONTROL = "JOB_CONTROL" + STEP_ACTIVE_LEARNING = "ACTIVE_LEARNING" + + # flow control blocks + STEP_ITERATOR = "ITERATOR" + + # execution + EXEC = "execution" + EXEC_PREFIXEXECUTION = "prefix_execution" + EXEC_BINARYLOCATION = "binary_location" + EXEC_PARALLELIZATION = "parallelization" + EXEC_PARALLELIZATION_CORES = "cores" + EXEC_PARALLELIZATION_MAXLENSUBLIST = "max_length_sublists" + EXEC_FAILUREPOLICY = "failure_policy" + EXEC_FAILUREPOLICY_NTRIES = "n_tries" + EXEC_RESOURCE = "resource" + EXEC_JOB_CONTROL = "job_control" + EXEC_JOB_CONTROL_PARTITION = "partition" + EXEC_JOB_CONTROL_GRES = "gres" + EXEC_JOB_CONTROL_MODULES = "modules" + EXEC_JOB_CONTROL_MEM = "mem" + EXEC_JOB_CONTROL_CORES = "cores" + EXEC_JOB_CONTROL_OTHER_ARGS = "other_args" + + # settings + SETTINGS = "settings" + SETTINGS_ARGUMENTS = "arguments" + SETTINGS_ARGUMENTS_FLAGS = "flags" + SETTINGS_ARGUMENTS_PARAMETERS = "parameters" + SETTINGS_ADDITIONAL = "additional" + + PIPE_INPUT = "pipe_input" + + # annotation: fixed strings + ANNOTATION_TAG_DOCKING_SCORE = "docking_score" + ANNOTATION_TAG_G_SCORE = "g_score" + + ANNOTATION_GRID_ID = "grid_id" + ANNOTATION_GRID_PATH = "grid_path" + ANNOTATION_GRID_FILENAME = "grid_filename" + + GRID_IDS = "grid_ids" # enforces given list of IDs rather than indices + + # I/O and "hand-over" + # --------- + FORMAT_SDF = "SDF" + FORMAT_CSV = "CSV" + FORMAT_SMI = "SMI" + FORMAT_MOL2 = "MOL2" + FORMAT_XTB = "XTB" + FORMAT_PDB = "PDB" + FORMAT_PKL = "PKL" + FORMAT_SMILES = "SMILES" + FORMAT_PLAIN = "PLAIN" + FORMAT_TXT = "TXT" + FORMAT_JSON = "JSON" + FORMAT_DTR = "DTR" + FORMAT_CMS = "CMS" + + INPUT = "input" + INPUT_FIELD = "field" + INPUT_SOURCES = "sources" + INPUT_COMPOUNDS = "compounds" + INPUT_ENUMERATIONS = "enumerations" + INPUT_EXTENSION = "extension" + INPUT_SOURCE = "source" + INPUT_GENERIC = "generic" + INPUT_FORMAT = "format" + INPUT_SOURCE_TYPE = "source_type" + INPUT_SOURCE_TYPE_FILE = "file" + INPUT_SOURCE_TYPE_DIR = "dir" + INPUT_SOURCE_TYPE_PATH = "path" + INPUT_SOURCE_TYPE_STEP = "step" + INPUT_SOURCE_TYPE_STRING = "string" + INPUT_SOURCE_TYPE_URL = "url" + + INPUT_ENFORCE_IDS = "enforce_ids" + INPUT_ENFORCE_COMPOUND_IDS = "compound_ids" + INPUT_ENFORCE_ENUMERATION_IDS = "enumeration_ids" + + INPUT_MERGE = "merge" + INPUT_MERGE_COMPOUNDS = "compounds" + INPUT_MERGE_COMPOUNDS_BY = "merge_compounds_by" + INPUT_MERGE_ENUMERATIONS = "enumerations" + INPUT_MERGE_ENUMERATIONS_BY = "merge_enumerations_by" + INPUT_MERGE_BY_NAME = "name" + INPUT_MERGE_BY_ID = "id" + INPUT_MERGE_BY_SMILE = "smile" + + FILE_TYPE_PDB = "pdb" + FILE_SIZE_THRESHOLD = 2e9 + + # CSV settings + INPUT_CSV_DELIMITER = "delimiter" + INPUT_CSV_COLUMNS = "columns" + INPUT_CSV_SMILES_COLUMN = "smiles" + INPUT_CSV_NAMES_COLUMN = "names" + + # write-out settings + WRITEOUT = "writeout" + WRITEOUT_CONFIG = "config" + + WRITEOUT_COMP = "compounds" + WRITEOUT_COMP_CATEGORY = "category" + WRITEOUT_COMP_CATEGORY_CONFORMERS = "conformers" + WRITEOUT_COMP_CATEGORY_ENUMERATIONS = "enumerations" + WRITEOUT_COMP_CATEGORY_EXTRADATA = "extra_data" + WRITEOUT_COMP_KEY = "key" + WRITEOUT_COMP_AGGREGATION = "aggregation" + WRITEOUT_COMP_AGGREGATION_MODE = "mode" + WRITEOUT_COMP_AGGREGATION_MODE_ALL = "all" + WRITEOUT_COMP_AGGREGATION_MODE_BESTPERCOMPOUND = "best_per_compound" + WRITEOUT_COMP_AGGREGATION_MODE_BESTPERENUMERATION = "best_per_enumeration" + WRITEOUT_COMP_AGGREGATION_HIGHESTISBEST = "highest_is_best" + WRITEOUT_COMP_AGGREGATION_KEY = "key" + WRITEOUT_COMP_SELECTED_TAGS = "selected_tags" + WRITEOUT_COMP_SELECTED_TAGS_KEY = "key" + WRITEOUT_COMP_SELECTED_TAGS_HIGHESTISBEST = "highest_is_best" + + WRITEOUT_GENERIC = "generic" + WRITEOUT_GENERIC_KEY = "key" + + WRITEOUT_DESTINATION = "destination" + WRITEOUT_DESTINATION_RESOURCE = "resource" + WRITEOUT_DESTINATION_TYPE = "type" + WRITEOUT_DESTINATION_TYPE_FILE = "file" + WRITEOUT_DESTINATION_TYPE_REINVENT = "reinvent" + WRITEOUT_DESTINATION_TYPE_STDOUT = "stdout" + WRITEOUT_DESTINATION_TYPE_STDERR = "stderr" + WRITEOUT_DESTINATION_TYPE_REST = "rest" + WRITEOUT_DESTINATION_FORMAT = "format" + WRITEOUT_DESTINATION_MERGE = "merge" + WRITEOUT_DESTINATION_AUTOMATIC = "automatic" + WRITEOUT_DESTINATION_BASE_NAME = "base_name" + WRITEOUT_DESTINATION_DIR = "dir" + WRITEOUT_DESTINATION_MODE = "mode" + + TOKEN_GUARD = "token_guard" + + # try to find the internal value and return + # def __getattr__(self, name): + # if name in self: + # return name + # raise AttributeError + + # prohibit any attempt to set any values + # def __setattr__(self, key, value): + # raise ValueError("No changes allowed.") + + +class IteratorEnum(str, Enum): + N_ITERS = "n_iters" + ALL = "all" + SINGLE = "single" + + +class StepEmbeddingEnum: + METHOD = "method" + METHOD_RDKIT = "RDKIT" + + EMBED_AS = "embed_as" + EMBED_AS_ENUMERATIONS = "enumerations" + EMBED_AS_CONFORMERS = "conformers" + + RDKIT_PROTONATE = "protonate" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepClusteringEnum: + N_CLUSTERS = "n_clusters" + MAX_ITER = "max_iter" + TOP_N_PER_SOLVENT = "top_n_per_solvent" + FEATURES = "features" + FREE_ENERGY_SOLVENT_TAGS = "free_energy_solvent_tags" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepRMSFilterEnum: + THRESHOLD = "threshold" # RMS threshold in Angstrom + + # order by this tag in picking the conformers + ORDER_BY = "order_by" + ORDER_ASCENDING = "ascending" + + METHOD = "method" # calculation method + METHOD_BEST = "best" # RDkit's "GetBestRMS" + METHOD_ALIGNMOL = "alignmol" # RDkit's "AlignMol" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepRMSDEnum: + + METHOD = "method" # calculation method + METHOD_BEST = "best" # RDkit's "GetBestRMS" + METHOD_ALIGNMOL = "alignmol" # RDkit's "AlignMol" + + RMSD_TAG = "rmsd" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepFeatureCounterEnum: + + FEATURE = "feature" + LEVEL = "level" + LEVEL_ENUMERATION = "enumeration" + LEVEL_CONFORMER = "conformer" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepDataManipulationEnum: + + # specify actions that can be used + ACTION = "action" + ACTION_NO_ACTION = ( + # used to skip any calculation (e.g. to just pool input data) + "no_action" + ) + CONVERT_MAE_TO_PDB = "mae2pdb" + ASSEMBLE_COMPLEXES = "assemble_complexes" + ACTION_ATTACH_CONFORMERS_AS_EXTRA = "attach_conformers_as_extra" + COLLECT_ITERATOR_RESULTS = "collect_iterator_results" + FILTER = "filter" + + # --> For ACTION_ATTACH_CONFORMERS_AS_EXTRA + # --- Match everything with the same :: string + MATCH_SOURCE = ( + "source" # step from which the conformers are to be used for matching + ) + KEY_MATCHED = "matched" # extra-data key for matched data + RECEPTOR = "receptor" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepBoltzmannWeightingEnum: + PROPERTIES = "properties" + PROPERTIES_INPUT = "input" + PROPERTIES_OUTPUT = "output" + + WEIGHT = "weight" + WEIGHT_INPUT = "input" + WEIGHT_OUTPUT_PREFIX = "output_prefix" + WEIGHT_PROPERTIES = "properties" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepPredictorEnum: + MODEL_PATH = "model_path" + FEATURES = "features" + NAME_PREDICTED = "name_predicted" + NAME_PREDICTED_DEFAULT = "pred_value" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepModelBuilderEnum: + # configuration fields + DATA = "data" + DATA_INPUT_COLUMN = "input_column" + DATA_RESPONSE_COLUMN = "response_column" + DATA_TRAININGSET_FILE = "training_dataset_file" + DATA_TESTSET_FILE = "test_dataset_file" + + # fixed tempfile names + TMP_INPUT_CONFIG = "input_config.json" + TMP_INPUT_DATA = "input_data.csv" + TMP_OUTPUT_BEST_MODEL = "best_model.pkl" + TMP_OUTPUT_BEST_PARAMETERS = "best_parameters.json" + TMP_OUTPUT_PRODUCTION_MODEL = "production_model.pkl" + + # fields + FIELD_KEY_PRODUCTION_MODEL = "production_model" + FIELD_KEY_BEST_CONFIGURATION = "best_configuration" + FIELD_KEY_INPUT_DATA = "input_data" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class TokenGuardEnum: + TG = "token_guard" + TG_PREFIX_EXECUTION = "prefix_execution" + TG_BINARY_LOCATION = "binary_location" + TG_TOKEN_POOLS = "token_pools" + TG_WAIT_INTERVAL_SECONDS = "wait_interval_seconds" + TG_WAIT_LIMIT_SECONDS = "wait_limit_seconds" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepPrepwizEnum: + + REMOVE_RES = "remove_res" + LIGANDS = "ligands" + COFACTOR_IDS = [ + "TDP", + "FAD", + "FMN", + "NAD", + "PNS", + "COA", + "PLP", + "GSH", + "BTN", + "FFO", + "B12", + "ASC", + "MQ7", + "UQ1", + "MGD", + "H4B", + "MDO", + "SAM", + "F43", + "COM", + "TP7", + "HEA", + "DPM", + "PQQ", + "TPQ", + "TRQ", + "LPA", + "HEM", + ] + + +class StepLigprepEnum: + FILTER_FILE = "filter_file" + + # the SDF tag with -# (where # is the number of the enumeration starting with '1') + LIGPREP_VARIANTS = "s_lp_Variant" + # number from 0 to 1 (sums up to 1 over all variants) + LIGPREP_TAUTOMER_PROBABILITY = "r_lp_tautomer_probability" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepGlideEnum: + # input specification parameters + CONFIGURATION = "configuration" + TIME_LIMIT_PER_TASK = "time_limit_per_task" + MAESTRO_IN_FILE = "maestro_in_file" + MAESTRO_IN_FILE_PATH = "path" + + # glide: fixed strings + # the docking score (including "Epik" corrections") + GLIDE_DOCKING_SCORE = "r_i_docking_score" + # the "docking score" without "Epik" corrections + GLIDE_GSCORE = "r_i_glide_gscore" + # the index of the ligand in the input file (starting with '1') + GLIDE_SOURCE_FILE_INDEX = "i_m_source_file_index" + + GLIDE_POSEVIEWER_FILE_KEY = "structures_pv.maegz" + GLIDE_MAEGZ_DEFAULT_EXTENSION = "_pv.maegz" + GLIDE_SDF_DEFAULT_EXTENSION = "_lib.sdfgz" + GLIDE_LOG = ".log" + GLIDE_SDF = ".sdf" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepMacromodelEnum: + # COM file + COM_FILE = "com_file" + COM_FILE_PATH = "com_file.com" + COM_FILE_DEFAULT = """ MMOD 0 1 0 0 0.0000 0.0000 0.0000 0.0000 + DEBG 55 0 0 0 0.0000 0.0000 0.0000 0.0000 + FFLD 16 1 0 0 1.0000 0.0000 0.0000 0.0000 + SOLV 3 1 0 0 0.0000 0.0000 0.0000 0.0000 + EXNB 0 0 0 0 0.0000 0.0000 0.0000 0.0000 + BDCO 0 0 0 0 89.4427 99999.0000 0.0000 0.0000 + READ 0 0 0 0 0.0000 0.0000 0.0000 0.0000 + CRMS 0 0 0 0 0.0000 0.8000 0.0000 0.0000 + LMCS 1000 0 0 0 0.0000 0.0000 3.0000 6.0000 + NANT 0 0 0 0 0.0000 0.0000 0.0000 0.0000 + MCNV 1 5 0 0 0.0000 0.0000 0.0000 0.0000 + MCSS 2 0 0 0 27.0000 0.0000 0.0000 0.0000 + MCOP 1 0 0 0 0.5000 0.0000 0.0000 0.0000 + DEMX 0 833 0 0 27.0000 54.0000 0.0000 0.0000 + MSYM 0 0 0 0 0.0000 0.0000 0.0000 0.0000 + AUOP 0 0 0 0 100.0000 0.0000 0.0000 0.0000 + AUTO 0 2 1 1 0.0000 1.0000 0.0000 2.0000 + CONV 2 0 0 0 0.0500 0.0000 0.0000 0.0000 + MINI 1 0 2500 0 0.0000 0.0000 0.0000 0.0000""" + + # fixed file names + MAE_INPUT = "input_mol.mae" + MAE_OUTPUT = "output_mol.mae" + SDF_OUTPUT = "output_mol.sdf" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepPrimeEnum: + + RECEPTOR = "receptor" # path to the receptor MAE file + POSEVIEWER = "poseviewer" + + # fixed file names + SDF_INPUT = "input_mol.sdf" + MAE_INPUT = "input_mol.mae" + MAE_COMPLEX = "complex.mae" + MAE_OUTPUT = "complex-out.maegz" + MMGBSA_SCORE = "r_psp_MMGBSA_dG_Bind" + SDF_OUTPUT = "output_mol.sdf" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepTurbomoleEnum: + EXECUTION_MODE = "execution_mode" + SUCCESS = "success" + FAILED = "failed" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepPantherEnum: + # settings.additional + PANTHER_LOCATION = "panther_location" + PANTHER_CONFIG_FILE = "panther_config_file" + OUTPUT_FILE = "output_file" + PANTHER_CONFIG_DIR = "panther_config_file" + FIELDS = "fields" + + # fields + FIELD_KEY_NEGATIVE_IMAGE = "negative_image" + FIELD_KEY_COORDINATES = "5-Center" + FIELD_KEY_PDB_FILE = "1-Pdb file" + + # parameters + FIELDS_PARAMETERS_LIB = { + "2-Radius": "rad.lib", + "3-Angle": "angles.lib", + "4-Charge": "charges.lib", + } + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepShaepEnum: + # field keys for storing data + FIELD_KEY_NEGATIVE_IMAGE = "negative_image" + NEG_IMAGE_EXT = "mol2" + + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepGromacsEnum: + FIELDS = "fields" + COFACTORS = "cofactors" + FORCEFIELD = "forcefield" + INPUT_FILE = "input_file" + + FIELD_KEY_STRUCTURE = "gro" + FIELD_KEY_PDB = "pdb" + FIELD_KEY_TOPOL = "top" + FIELD_KEY_TPR = "tpr" + FIELD_KEY_MDP = "mdp" + FIELD_KEY_XTC = "xtc" + FIELD_KEY_ITP = "itp" + FIELD_KEY_LOG = "log" + FIELD_KEY_EDR = "edr" + FIELD_KEY_NDX = "ndx" + FILE_SIZE_THRESHOLD = 2000000000 + + MAKE_NDX_COMMAND = "make_ndx_command" + INDEX_FLAG = "-n" + + # magic strings associated with ligand parametrisation step + FORCEFIELD_ITP = "forcefield.itp" + LIGAND_ITP = "Ligand.itp" + INCLUDE_LIG_ITP = '#include "Ligand.itp"' + LIG_MOLECULE_GRP = "Ligand 1\n" + COMPLEX_TOP = "Complex.top" + COMPLEX_PDB = "Complex.pdb" + PROTEIN_PDB = "Protein.pdb" + PROTEIN_TOP = "Protein.top" + LIGAND_PDB = "Ligand.pdb" + LIGAND_MOL2 = "Ligand.mol2" + STD_INDEX = "index.ndx" + STD_TOPOL = "topol.top" + STD_TPR = "structure.tpr" + STD_XTC = "structure.xtc" + STD_STRUCTURE = "structure.gro" + POSRE_LIG = "posre_lig.itp" + FORCE_CONSTANTS = "1000 1000 1000" + LIG_ID = "lig_id" + COUPLING_GROUP = "Other" + MMPBSA_IN = "mmpbsa.in" + GROMACS_LOAD = "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + AMBERTOOLS_LOAD = "module load AmberTools/21-fosscuda-2019a-Python-3.7.2" + WATER_AND_IONS = "Water_and_ions" + PROTEIN_OTHER = "Protein_Other" + SIM_COMPLETE = "Finished mdrun" + AUTO = "auto" + TC_GRPS = "tc-grps" + CLUSTERS_NUMBER = "clustersNumber" + LENGTHS = "lengths" + COUPLING_GROUPS = "coupling_groups" + + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepCavExploreEnum: + FIELD_KEY_DTR = "dtr" + FIELD_KEY_CMS = "cms" + FIELD_KEY_DX = "dx" + + # settings.additional + CAVITY_LOCATION = "cavity_location" + CAVITY_CONFIG_FILE = "cavity_config_file" + OUTPUT_FILE = "output_file" + CAVITY_CONFIG_DIR = "cavity_config_dir" + FIELDS = "fields" + SELECTION_TEXT = "selection_text" + PROTEIN = "protein" + NAME_CA = "name CA" + FRAME_LIST_FILE = "list_of_frames.txt" + MDPOCKET_COMMAND = "mdpocket" + MPI_THREADS = "mpi_threads" + EPS = "eps" + MIN_SAMPLES = "min_samples" + ISO_VALUE = "iso_value" + TRAJ_TYPE = "format" + THRESHOLD = "threshold" + + # add own fixed strings and import in step + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepFepPlusEnum: + FIELD_KEY_POSEVIEWER = "poseviewer" + RECEPTOR_MAEGZ = "receptor.maegz" + STRUCT_SPLIT_BASE = "split" + STRUCTCAT_MAEGZ_OUTFILE = "concatenated.mae" + STRUCTCAT_SDF_OUTFILE = "concatenated.sdf" + FEP_MAPPER_OUTPUT = "out" + FMP_OUTPUT_FILE = "out.fmp" + LOGFILE = "multisim.log" + EDGE_HEADER_LINE = "* Edge calculated properties (units in kcal/mol)" + NODE_HEADER_LINE = "* Node calculated properties (units in kcal/mol)" + SIMULATION_PROTOCOL = "* Simulation Protocol" + SIMILARITY = "* Similarity" + DATA_TERMINUS = "fep_mapper_cleanup: Loading output graph" + HOST_FLAG = "-HOST" + WAIT_FLAG = "-WAIT" + JOBNAME_FLAG = "-JOBNAME" + REFERENCE_DG = "ref_dg" + JOBID_STRING = "JobId:" + XRAY_STRUCTURES = "xray_structures" + XRAY_SPLIT = "xray_split" + RETRIES = "-RETRIES" + + FILE_NAME = "--name" + FEP_EXEC_COMPLETE = "Multisim completed." + FEP_EXEC_PARTIAL_COMPLETE = "Multisim partially completed." + + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepFilterEnum: + FILTER_LEVEL = "filter_level" + CONFORMERS = "conformers" + COMPOUNDS = "compounds" + HIGHEST_IS_BEST = "highest_is_best" + ENUMERATIONS = "enumerations" + CRITERIA = "criteria" + AGGREGATION = "aggregation" + RETURN_N = "return_n" + + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepDesmondEnum: + + PREPROCESS_MSJ = "config.msj" + PRODUCTION_MSJ = "production.msj" + PRODUCTION_CFG = "prod.cfg" + MSJ_FIELDS = "msj_fields" + CFG_FIELDS = "cfg_fields" + SETUP_MSJ_FIELDS = "setup_msj_fields" + CONFIG = "config" + TOKEN_STR = "DESMOND_GPGPU:16" + + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepPdbFixerEnum: + ADD_MISSING_HYDROGENS = "add_missing_hydrogens" + ADD_MISSING_ATOMS = "add_missing_atoms" + FIND_MISSING_ATOMS = "find_missing_atoms" + FIND_MISSING_RESIDUES = "find_missing_residues" + REPLACE_NONSTANDARD_RESIDUES = "replace_nonstandard_residues" + REMOVE_CHAINS = "remove_chains" + + +class StepDSSPEnum: + pass + + +class StepCressetEnum: + SUCCESS = "success" + + +class StepAutoDockVinaEnum: + + ADV_RECEPTOR_PATH = "receptor_path" + ADV_SEED = "seed" + ADV_SEARCH_SPACE = "search_space" + ADV_SEARCH_SPACE_CENTER_X = "--center_x" + ADV_SEARCH_SPACE_CENTER_Y = "--center_y" + ADV_SEARCH_SPACE_CENTER_Z = "--center_z" + ADV_SEARCH_SPACE_SIZE_X = "--size_x" + ADV_SEARCH_SPACE_SIZE_Y = "--size_y" + ADV_SEARCH_SPACE_SIZE_Z = "--size_z" + + CONFIGURATION = "configuration" + NUMBER_POSES = "number_poses" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepAutoDockVinaTargetPreparationEnum: + + ADV_PDBQT = ".pdbqt" + INPUT_RECEPTOR_PDB = "input_receptor_pdb" + OUTPUT_RECEPTOR_PDBQT = "output_receptor_pdbqt" + PH = "pH" + EXTRACT_BOX = "extract_box" + EXTRACT_BOX_REFERENCE_LIGAND_PATH = "reference_ligand_path" + EXTRACT_BOX_REFERENCE_LIGAND_FORMAT = "reference_ligand_format" + EXTRACT_BOX_REFERENCE_LIGAND_FORMAT_PDB = "PDB" + EXTRACT_BOX_REFERENCE_LIGAND_FORMAT_SDF = "SDF" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class StepActiveLearningEnum: + + ORACLE_CONFIG = "oracle_config" + SMILES = "SMILES" + MOLECULE = "Moleucle" + VIRTUAL_LIB = "virtual_lib" + INIT_SAMPLES = "init_samples" + MORGAN_FP = "MorganFP" + N_ROUNDS = "n_rounds" + BATCH_SIZE = "batch_size" + CRITERIA = "criteria" + VALIDATION_LIB = "validation_lib" diff --git a/icolos/utils/enums/step_initialization_enum.py b/icolos/utils/enums/step_initialization_enum.py new file mode 100644 index 0000000..af53e14 --- /dev/null +++ b/icolos/utils/enums/step_initialization_enum.py @@ -0,0 +1,110 @@ +from icolos.core.job_control.job_control import StepJobControl +from icolos.core.workflow_steps.autodockvina.docking import StepAutoDockVina +from icolos.core.workflow_steps.autodockvina.target_preparation import ( + StepAutoDockVinaTargetPreparation, +) +from icolos.core.workflow_steps.calculation.electrostatics.esp_sim import StepEspSim +from icolos.core.workflow_steps.calculation.feature_counter import StepFeatureCounter +from icolos.core.workflow_steps.gromacs.do_dssp import StepGMXDoDSSP +from icolos.core.workflow_steps.gromacs.mmpbsa import StepGMXmmpbsa +from icolos.core.workflow_steps.cavity_explorer.mdpocket import StepMDpocket +from icolos.core.workflow_steps.gromacs.trajcat import StepGMXTrjcat +from icolos.core.workflow_steps.io.data_manipulation import StepDataManipulation +from icolos.core.workflow_steps.schrodinger.fep_analysis import StepFepPlusAnalysis +from icolos.core.workflow_steps.structure_prediction.pdb_fixer import StepPdbFixer +from icolos.core.workflow_steps.gromacs import * +from icolos.core.workflow_steps.calculation.boltzmann_weighting import ( + StepBoltzmannWeighting, +) +from icolos.core.workflow_steps.calculation.rmsd import StepRMSD +from icolos.core.workflow_steps.schrodinger import * +from icolos.core.workflow_steps.calculation.cosmo import StepCosmo +from icolos.core.workflow_steps.calculation.turbomole import StepTurbomole +from icolos.core.workflow_steps.confgen.crest import StepCREST +from icolos.core.workflow_steps.pmx import * +from icolos.core.workflow_steps.confgen.omega import StepOmega +from icolos.core.workflow_steps.confgen.xtb import StepXTB +from icolos.core.workflow_steps.io.embedder import StepEmbedding +from icolos.core.workflow_steps.io.initialize_compound import StepInitializeCompound +from icolos.core.workflow_steps.prediction.predictor import StepPredictor +from icolos.core.workflow_steps.prediction.model_building import StepModelBuilder +from icolos.core.workflow_steps.calculation.clustering import StepClustering +from icolos.core.workflow_steps.calculation.rms_filter import StepRMSFilter +from icolos.core.workflow_steps.calculation.panther import StepPanther +from icolos.core.workflow_steps.calculation.shaep import StepShaep +from icolos.core.workflow_steps.structure_prediction.peptide_embedder import ( + StepPeptideEmbedder, +) +from icolos.core.workflow_steps.structure_prediction.dssp import StepDSSP +from icolos.utils.enums.step_enums import StepBaseEnum + + +_SBE = StepBaseEnum + + +class StepInitializationEnum: + + STEP_INIT_DICT = { + _SBE.STEP_CREST: StepCREST, + _SBE.STEP_OMEGA: StepOmega, + _SBE.STEP_XTB: StepXTB, + _SBE.STEP_MACROMODEL: StepMacromodel, + _SBE.STEP_TURBOMOLE: StepTurbomole, + _SBE.STEP_COSMO: StepCosmo, + _SBE.STEP_INITIALIZATION: StepInitializeCompound, + _SBE.STEP_EMBEDDING: StepEmbedding, + _SBE.STEP_PREDICTION: StepPredictor, + _SBE.STEP_MODEL_BUILDING: StepModelBuilder, + _SBE.STEP_BOLTZMANN_WEIGHTING: StepBoltzmannWeighting, + _SBE.STEP_PRIME: StepPrime, + _SBE.STEP_DESMOND: StepDesmondExec, + _SBE.STEP_DESMOND_SETUP: StepDesmondSetup, + _SBE.STEP_CLUSTERING: StepClustering, + _SBE.STEP_RMSFILTER: StepRMSFilter, + _SBE.STEP_PANTHER: StepPanther, + _SBE.STEP_SHAEP: StepShaep, + _SBE.STEP_PDB2GMX: StepGMXPdb2gmx, + _SBE.STEP_EDITCONF: StepGMXEditConf, + _SBE.STEP_SOLVATE: StepGMXSolvate, + _SBE.STEP_GENION: StepGMXGenion, + _SBE.STEP_GROMPP: StepGMXGrompp, + _SBE.STEP_MDRUN: StepGMXMDrun, + _SBE.STEP_TRJCONV: StepGMXTrjconv, + _SBE.STEP_TRJCAT: StepGMXTrjcat, + _SBE.STEP_CLUSTER: StepGMXCluster, + _SBE.STEP_DO_DSSP: StepGMXDoDSSP, + _SBE.STEP_GMX_RMSD: StepGMXrmsd, + _SBE.STEP_LIGPREP: StepLigprep, + _SBE.STEP_GLIDE: StepGlide, + _SBE.STEP_FEP_PLUS_SETUP: StepFepPlusSetup, + _SBE.STEP_FEP_PLUS_EXEC: StepFepPlusExec, + _SBE.STEP_FEP_PLUS_ANALYSIS: StepFepPlusAnalysis, + _SBE.STEP_PREPWIZARD: StepPrepwizard, + _SBE.STEP_MDPOCKET: StepMDpocket, + _SBE.STEP_PEPTIDE_EMBEDDER: StepPeptideEmbedder, + _SBE.STEP_PDB_FIXER: StepPdbFixer, + _SBE.STEP_GMX_MMPBSA: StepGMXmmpbsa, + _SBE.STEP_TS_CLUSTER: StepClusterTS, + _SBE.STEP_DSSP: StepDSSP, + _SBE.STEP_RMSD: StepRMSD, + _SBE.STEP_DATA_MANIPULATION: StepDataManipulation, + _SBE.STEP_PMX_ASSEMBLE_SYSTEMS: StepPMXAssembleSystems, + _SBE.STEP_PMX_ATOMMAPPING: StepPMXatomMapping, + _SBE.STEP_PMX_ABFE: StepPMXabfe, + _SBE.STEP_PMX_DOUBLEBOX: StepPMXdoublebox, + _SBE.STEP_PMX_LIGANDHYBRID: StepPMXligandHybrid, + _SBE.STEP_PMX_BOX_WATER_IONS: StepPMXBoxWaterIons, + _SBE.STEP_PMX_SETUP: StepPMXSetup, + _SBE.STEP_PMX_PREPARE_TRANSITIONS: StepPMXPrepareTransitions, + _SBE.STEP_PMX_PREPARE_SIMULATIONS: StepPMXPrepareSimulations, + _SBE.STEP_PMX_RUN_ANALYSIS: StepPMXRunAnalysis, + _SBE.STEP_PMX_MUTATE: StepPMXmutate, + _SBE.STEP_PMX_GENTOP: StepPMXgentop, + _SBE.STEP_PMX_GENLIB: StepPMXgenlib, + _SBE.STEP_FEATURE_COUNTER: StepFeatureCounter, + _SBE.STEP_AUTODOCKVINA_DOCKING: StepAutoDockVina, + _SBE.STEP_AUTODOCKVINA_TARGET_PREPARATION: StepAutoDockVinaTargetPreparation, + _SBE.STEP_PMX_RUN_SIMULATIONS: StepPMXRunSimulations, + _SBE.STEP_JOB_CONTROL: StepJobControl, + _SBE.STEP_ESP_SIM: StepEspSim, + } diff --git a/icolos/utils/enums/write_out_enums.py b/icolos/utils/enums/write_out_enums.py new file mode 100644 index 0000000..30bd91f --- /dev/null +++ b/icolos/utils/enums/write_out_enums.py @@ -0,0 +1,47 @@ +class WriteOutEnum: + + RDKIT_NAME = "_Name" + INDEX_STRING = "index_string" + COMPOUND_NAME = "compound_name" + + # REINVENT-compatible JSON write-out + JSON_RESULTS = "results" + JSON_NAMES = "names" + JSON_NA = "" + JSON_VALUES = "values" + JSON_VALUES_KEY = "values_key" + + SDF = "sdf" + PDB = "pdb" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class RunVariablesEnum: + + PREFIX = "[" + POSTFIX = "]" + COMPOUND_ID = "compound_id" + ENUMERATION_ID = "enumeration_id" + CONFORMER_ID = "conformer_id" + COMPOUND_NAME = "compound_name" + ENUMERATION_STRING = "enumeration_string" + CONFORMER_STRING = "conformer_string" + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") diff --git a/icolos/utils/execute_external/__init__.py b/icolos/utils/execute_external/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/utils/execute_external/autodockvina.py b/icolos/utils/execute_external/autodockvina.py new file mode 100644 index 0000000..0c2c9f8 --- /dev/null +++ b/icolos/utils/execute_external/autodockvina.py @@ -0,0 +1,41 @@ +from icolos.utils.enums.program_parameters import AutoDockVinaEnum +from icolos.utils.execute_external.execute import ExecutorBase + +_EE = AutoDockVinaEnum() + + +class AutoDockVinaExecutor(ExecutorBase): + """For the execution of AutoDock Vina 1.2.0.""" + + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + # check, whether a proper executable is provided + if command not in [_EE.VINA_CALL]: + raise ValueError( + "Parameter command must be in dictionary of the internal AutoDock Vina executable list." + ) + + return super().execute( + command=command, + arguments=arguments, + check=check, + location=None, + pipe_input=pipe_input, + ) + + def is_available(self): + try: + result = self.execute( + command=_EE.VINA_CALL, arguments=[_EE.VINA_HELP], check=True + ) + if result.returncode == 0: + return True + return False + except Exception as e: + return False diff --git a/icolos/utils/execute_external/batch_executor.py b/icolos/utils/execute_external/batch_executor.py new file mode 100644 index 0000000..cee1d0a --- /dev/null +++ b/icolos/utils/execute_external/batch_executor.py @@ -0,0 +1,145 @@ +import os +from shlex import quote +from icolos.utils.execute_external.execute import ExecutorBase +from icolos.utils.enums.program_parameters import SlurmEnum +import subprocess +from typing import List +import time +from tempfile import mkstemp + +_SE = SlurmEnum() + + +class BatchExecutor(ExecutorBase): + """For execution of batch jobs using either Slurm or SGE scheduler.""" + + def __init__( + self, + cores: int, + partition: str, + time: str, + mem: str, + modules: List, + other_args: dict, + gres: str, + prefix_execution=None, + binary_location=None, + ): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + self.cores = cores + self.partition = partition + self.time = time + self.mem = mem + self.modules = modules + self.other_args = other_args + self.gres = gres + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + + batch_script = self._construct_slurm_header() + command = self._prepare_command(command, arguments, pipe_input) + batch_script.append(command) + _, tmpfile = mkstemp(dir=location, suffix=".sh") + with open(tmpfile, "w") as f: + for line in batch_script: + f.write(line) + f.write("\n") + + sbatch_command = f"sbatch {tmpfile}" + # execute the batch script + result = super().execute( + command=sbatch_command, arguments=[], location=location + ) + job_id = result.stdout.split()[-1] + state = self._wait_for_job_completion(job_id=job_id) + + # check the result from slurm + if check == True: + if state != _SE.COMPLETED: + raise subprocess.SubprocessError( + f"Subprocess returned non-zero exit status:\n{sbatch_command}\n Status:\n{state}" + ) + return state + + def is_available(self): + raise NotImplementedError( + "Cannot reliably check, whether a random program executes properly - do not use." + ) + + def _prepare_command( + self, command: str, arguments: List, pipe_input: str = None + ) -> str: + arguments = [quote(str(arg)) for arg in arguments] + + # allow for piped input to be passed to binaries + if pipe_input is not None: + # pipe_input = self._parse_pipe_input(pipe_input) + command = pipe_input + " | " + command + + # check, if command (binary) is to be found at a specific location (rather than in $PATH) + if self._binary_location is not None: + command = os.path.join(self._binary_location, command) + + # check, if the something needs to be added before the execution of the "rDock" command + if self._prefix_execution is not None: + command = self._prefix_execution + " && " + command + + # execute; if "location" is set, change to this directory and execute there + complete_command = command + " " + " ".join(str(e) for e in arguments) + complete_command = [complete_command.replace("'", "")] + return " ".join(complete_command) + + def _wait_for_job_completion(self, job_id): + completed = False + state = None + while completed is False: + state = self._check_job_status(job_id) + if state in [_SE.PENDING, _SE.RUNNING]: + time.sleep(5) + continue + elif state == _SE.COMPLETED: + completed = True + elif state == _SE.FAILED: + completed = True + + return state + + def _check_job_status(self, job_id): + """ + Monitor the status of a previously submitted job, return the result + """ + command = f"module load slurmtools && jobinfo {job_id}" + result = subprocess.run( + command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + ) + + state = None + for line in result.stdout.split("\n"): + if _SE.STATE in line: + state = line.split(":")[-1].split()[0] + return state + + def _construct_slurm_header(self): + header = [ + "#!/bin/bash", + f"#SBATCH -c{self.cores}", + f"#SBATCH -p {self.partition}", + f"#SBATCH --time={self.time}", + ] + header.append(f"#SBATCH --gres={self.gres}") + for key, value in self.other_args.items(): + header.append(f"#SBATCH {key}={value}") + + for module in self.modules: + header.append(f"module load {module}") + + return header diff --git a/icolos/utils/execute_external/cresset_executor.py b/icolos/utils/execute_external/cresset_executor.py new file mode 100644 index 0000000..533a6f0 --- /dev/null +++ b/icolos/utils/execute_external/cresset_executor.py @@ -0,0 +1,40 @@ +from icolos.utils.execute_external.execute import ExecutorBase + + +class CressetExecutor(ExecutorBase): + """For the execution of Cresset binaries binary.""" + + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + # check, whether a proper executable is provided + # if command not in [EE.OMEGA]: + # raise ValueError( + # "Parameter command must be an dictionary of the internal Omega executable list." + # ) + + return super().execute( + command=command, + arguments=arguments, + check=check, + location=None, + pipe_input=pipe_input, + ) + + def is_available(self): + # try: + # result = self.execute( + # command=EE.OMEGA, arguments=[EE.OMEGA_HELP], check=True + # ) + + # if EE.OMEGA_HELP_IDENTIFICATION_STRING in result.stderr: + # return True + # return False + # except Exception as e: + # return False + pass diff --git a/icolos/utils/execute_external/crest.py b/icolos/utils/execute_external/crest.py new file mode 100644 index 0000000..bf0901a --- /dev/null +++ b/icolos/utils/execute_external/crest.py @@ -0,0 +1,41 @@ +from icolos.utils.execute_external.execute import ExecutorBase +from icolos.utils.enums.program_parameters import CrestEnum + + +EE = CrestEnum() + + +class CrestExecutor(ExecutorBase): + """For the execution of the "crest" binary.""" + + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + # check, whether a proper executable is provided + if command not in [EE.CREST]: + raise ValueError( + "Parameter command must be an dictionary of the internal Crest executable list." + ) + + return super().execute( + command=command, + arguments=arguments, + check=check, + location=location, + pipe_input=pipe_input, + ) + + def is_available(self): + try: + result = self.execute(command=EE.CREST, arguments=[EE.CREST_H], check=True) + + if EE.CREST_HELP_IDENTIFICATION_STRING in result.stdout: + return True + return False + except Exception as e: + return False diff --git a/icolos/utils/execute_external/execute.py b/icolos/utils/execute_external/execute.py new file mode 100644 index 0000000..6398f38 --- /dev/null +++ b/icolos/utils/execute_external/execute.py @@ -0,0 +1,101 @@ +import os +import abc +import subprocess +from shlex import quote + +from icolos.utils.enums.execution_enums import ExecutionResourceEnum +from icolos.utils.enums.program_parameters import SlurmEnum + +_ERE = ExecutionResourceEnum +_SE = SlurmEnum() + + +class ExecutorBase(metaclass=abc.ABCMeta): + """Virtual base class for the general and program-specific executors.""" + + def __init__(self, prefix_execution=None, binary_location=None): + # if something needs to be attached to the execution string each time, store it here; if not, value is "None" + self._prefix_execution = prefix_execution + self._binary_location = binary_location + # initialise from the step with self.execution.resource dict + + @abc.abstractmethod + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + # to avoid security issues, escape the arguments + arguments = [quote(str(arg)) for arg in arguments] + + # allow for piped input to be passed to binaries + if pipe_input is not None: + # pipe_input = self._parse_pipe_input(pipe_input) + command = pipe_input + " | " + command + + # check, if command (binary) is to be found at a specific location (rather than in $PATH) + if self._binary_location is not None: + command = os.path.join(self._binary_location, command) + + # check, if the something needs to be added before the execution of the "rDock" command + if self._prefix_execution is not None: + command = self._prefix_execution + " && " + command + + # execute; if "location" is set, change to this directory and execute there + complete_command = command + " " + " ".join(str(e) for e in arguments) + complete_command = [complete_command.replace("'", "")] + # print(complete_command) + old_cwd = os.getcwd() + if location is not None: + os.chdir(location) + + # determine whether this is to be run using local resources or as a batch job + result = subprocess.run( + complete_command, + check=False, # use the manual check to provide better debugginf information than subprocess + # convert output to string (instead of byte array) + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=True, + ) + + if check: + if result.returncode != 0: + raise subprocess.SubprocessError( + f"Subprocess returned non-zero exit status:\n{complete_command}\nReturn code:\n{result.returncode}\nSTDERR:\n{result.stderr}\nSTDOUT:\n{result.stdout}" + ) + os.chdir(old_cwd) + return result + + @abc.abstractmethod + def is_available(self): + raise NotImplementedError("Overwrite this method in the child class.") + + +class Executor(ExecutorBase): + """For execution of command-line programs that do not have any specific executor themselves.""" + + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, + binary_location=binary_location, + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + return super().execute( + command=command, + arguments=arguments, + check=check, + location=location, + pipe_input=pipe_input, + ) + + def is_available(self): + raise NotImplementedError( + "Cannot reliably check, whether a random program executes properly - do not use." + ) + + +def execution_successful(output: str, success_str: str) -> bool: + return True if success_str in output else False diff --git a/icolos/utils/execute_external/fep_plus.py b/icolos/utils/execute_external/fep_plus.py new file mode 100644 index 0000000..91f5437 --- /dev/null +++ b/icolos/utils/execute_external/fep_plus.py @@ -0,0 +1,48 @@ +from icolos.utils.execute_external.execute import ExecutorBase +from icolos.utils.enums.program_parameters import ( + FepPlusEnum, + SchrodingerExecutablesEnum, +) + +FE = FepPlusEnum() +SEE = SchrodingerExecutablesEnum() + + +class FepPlusExecutor(ExecutorBase): + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + if command not in [ + FE.FEP_MAPPER, + FE.FEP_EXECUTOR, + FE.JSC_LIST, + FE.JSC_TAIL_FILE, + ]: + raise ValueError( + "Execution command must be recognised by the executable's enum" + ) + + return super().execute( + command=command, + arguments=arguments, + check=check, + location=location, + pipe_input=pipe_input, + ) + + def is_available(self): + try: + result = self.execute( + command=FE.FEP_MAPPER, arguments=[FE.FEP_HELP], check=True + ) + if FE.FEP_MAPPER_HELP_SUCCESS_STRING in result.stdout: + return True + return False + except Exception as e: + print(str(e)) + return False diff --git a/icolos/utils/execute_external/glide.py b/icolos/utils/execute_external/glide.py new file mode 100644 index 0000000..5c7f05a --- /dev/null +++ b/icolos/utils/execute_external/glide.py @@ -0,0 +1,44 @@ +from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum, GlideEnum +from icolos.utils.execute_external.execute import ExecutorBase + +SEE = SchrodingerExecutablesEnum() +EE = GlideEnum() + + +class GlideExecutor(ExecutorBase): + """For the execution of the "glide" binary.""" + + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + # check, whether a proper executable is provided + if command not in [EE.GLIDE]: + raise ValueError( + "Parameter command must be an dictionary of the internal Glide executable list." + ) + + # Note: It seems in former times, the call "glide" had to be changed to "$SCHRODINGER/glide" here. + return super().execute( + command=command, + arguments=arguments, + check=check, + location=location, + pipe_input=pipe_input, + ) + + def is_available(self): + try: + result = self.execute( + command=EE.GLIDE, arguments=[EE.GLIDE_HELP], check=True + ) + + if EE.GLIDE_HELP_IDENTIFICATION_STRING in result.stdout: + return True + return False + except Exception as e: + return False diff --git a/icolos/utils/execute_external/gromacs.py b/icolos/utils/execute_external/gromacs.py new file mode 100644 index 0000000..72c1d01 --- /dev/null +++ b/icolos/utils/execute_external/gromacs.py @@ -0,0 +1,53 @@ +from icolos.utils.enums.program_parameters import GromacsEnum +from icolos.utils.execute_external.execute import ExecutorBase + +_GE = GromacsEnum() + + +class GromacsExecutor(ExecutorBase): + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + gmx_commands = [ + _GE.EDITCONF, + _GE.GENION, + _GE.GROMPP, + _GE.SOLVATE, + _GE.MDRUN, + _GE.PDB2GMX, + _GE.MAKE_NDX, + _GE.GENRESTR, + _GE.TRJCONV, + _GE.TRJCAT, + _GE.CLUSTER, + _GE.MMPBSA, + _GE.DO_DSSP, + _GE.RMS, + ] + + if not any([cmd in command for cmd in gmx_commands]): + raise ValueError( + "Command must be present in internal list of GROMACS executables" + ) + # handle for dealing with programs that want interactive input + return super().execute( + command=command, + arguments=arguments, + check=check, + location=location, + pipe_input=pipe_input, + ) + + def is_available(self): + try: + result = self.execute(command=_GE.PDB2GMX, arguments=[], check=False) + if _GE.PDB2GMX_FAIL_ID_STRING in result.stderr: + return True + return False + except Exception as e: + return False diff --git a/icolos/utils/execute_external/ifd_executor.py b/icolos/utils/execute_external/ifd_executor.py new file mode 100644 index 0000000..9520013 --- /dev/null +++ b/icolos/utils/execute_external/ifd_executor.py @@ -0,0 +1,40 @@ +from icolos.utils.execute_external.execute import ExecutorBase +from icolos.utils.enums.program_parameters import InducedFitEnum + +_IFE = InducedFitEnum() + + +class IFDExecutor(ExecutorBase): + def __init__(self, prefix_execution=None, binary_location=None) -> None: + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + + if command not in [_IFE.IFD_EXEC]: + raise AssertionError( + "Commmand must be recognised in the internal dictionary" + ) + + return super().execute( + command=command, + arguments=arguments, + check=check, + location=location, + pipe_input=pipe_input, + ) + + def is_available(self): + try: + result = self.execute( + command=_IFE.IFD_EXEC, arguments=[_IFE.IFD_HELP], check=True + ) + + if _IFE.IFD_HELP_ID in result.stdout: + return True + return False + except Exception as e: + return False diff --git a/icolos/utils/execute_external/license_token_guard.py b/icolos/utils/execute_external/license_token_guard.py new file mode 100644 index 0000000..1cdc1cf --- /dev/null +++ b/icolos/utils/execute_external/license_token_guard.py @@ -0,0 +1,127 @@ +import time +from typing import Dict + +from pydantic import BaseModel, PrivateAttr + +from icolos.utils.execute_external.execute import Executor + +from icolos.loggers.steplogger import StepLogger + +from icolos.utils.enums.logging_enums import LoggingConfigEnum +from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum + +_EE = SchrodingerExecutablesEnum() +_LE = LoggingConfigEnum() + + +class TokenGuardParameters(BaseModel): + prefix_execution: str = None + binary_location: str = None + token_pools: Dict + wait_interval_seconds: int = 30 + wait_limit_seconds: int = 0 + + +class SchrodingerLicenseTokenGuard(BaseModel): + """Class that checks, whether enough tokens to execute Schrodinger binaries are available.""" + + token_guard: TokenGuardParameters + + class Config: + underscore_attrs_are_private = True + + _logger = PrivateAttr() + _executor = PrivateAttr() + + def __init__(self, **data): + super().__init__(**data) + + self._logger = StepLogger() + + # initialize the executor for all "Schrodinger" related calls and also check if it is available + self._executor = Executor( + prefix_execution=self.token_guard.prefix_execution, + binary_location=self.token_guard.binary_location, + ) + + def _get_token_pool_info(self, licadmin_output: list, token_pool: str) -> dict: + result = {"found": False} + for line in licadmin_output: + if token_pool in line: + parts = line.split(" ") + if len(parts) == 16: + result["total"] = int(parts[6]) + result["available"] = int(parts[6]) - int(parts[12]) + result["found"] = True + break + return result + + def _check_licstat_output(self, licadmin_output: list) -> bool: + all_pools_available = True + for pool_key, pool_token_numbers in self.token_guard.token_pools.items(): + pool_status = self._get_token_pool_info(licadmin_output, pool_key) + if pool_status["found"]: + if pool_status["available"] >= pool_token_numbers: + self._logger.log( + f"Enough tokens available ({pool_status['available']}) to satisfy requirement ({pool_token_numbers} free tokens) for pool {pool_key}.", + _LE.DEBUG, + ) + else: + self._logger.log( + f"Not enough tokens available ({pool_status['available']}) to satisfy requirement ({pool_token_numbers} free tokens) for pool {pool_key}.", + _LE.DEBUG, + ) + all_pools_available = False + else: + all_pools_available = False + self._logger.log( + f"Could not find information on token pool {pool_key}.", _LE.WARNING + ) + return all_pools_available + + def _get_licstat_output(self): + result = self._executor.execute( + command=_EE.LICADMIN, arguments=[_EE.LICADMIN_STAT], check=True + ) + if result.returncode != 0: + self._logger.log( + f"Could not execute the Schrodinger license token guard - do you need to export the licadmin path?", + _LE.WARNING, + ) + return result.stdout.split("\n") + + def guard(self) -> bool: + # loop over the token pools until they are all satisfied or the time limit has run out + counter = 0 + success = False + while True: + if ( + self.token_guard.wait_limit_seconds != 0 + and (counter * self.token_guard.wait_interval_seconds) + >= self.token_guard.wait_limit_seconds + ): + self._logger.log( + f"Wait period ({self.token_guard.wait_limit_seconds} seconds) set for Schrodinger token guard has been exceeded.", + _LE.ERROR, + ) + break + + # reload the output from "licadmin" + # at this stage, the output from licadmin is a list of strings + licadmin_output = self._get_licstat_output() + + all_pools_available = self._check_licstat_output( + licadmin_output=licadmin_output + ) + if all_pools_available: + self._logger.log( + "All token pool requirements for Schrodinger have been met - proceeding.", + _LE.DEBUG, + ) + success = True + break + else: + time.sleep(self.token_guard.wait_interval_seconds) + counter = counter + 1 + + return success diff --git a/icolos/utils/execute_external/ligprep.py b/icolos/utils/execute_external/ligprep.py new file mode 100644 index 0000000..903d19f --- /dev/null +++ b/icolos/utils/execute_external/ligprep.py @@ -0,0 +1,47 @@ +from icolos.utils.enums.program_parameters import ( + SchrodingerExecutablesEnum, + LigprepEnum, +) +from icolos.utils.execute_external.execute import ExecutorBase + +SEE = SchrodingerExecutablesEnum() +EE = LigprepEnum() + + +class LigprepExecutor(ExecutorBase): + """For the execution of the "ligprep" binary.""" + + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + # check, whether a proper executable is provided + if command not in [EE.LIGPREP]: + raise ValueError( + "Parameter command must be an dictionary of the internal Ligprep executable list." + ) + + # Note: It seems in former times, the call "ligprep" had to be changed to "$SCHRODINGER/ligprep" here. + return super().execute( + command=command, + arguments=arguments, + check=check, + location=location, + pipe_input=pipe_input, + ) + + def is_available(self): + try: + result = self.execute( + command=EE.LIGPREP, arguments=[EE.LIGPREP_HELP], check=True + ) + + if EE.LIGPREP_HELP_IDENTIFICATION_STRING in result.stdout: + return True + return False + except Exception as e: + return False diff --git a/icolos/utils/execute_external/macromodel.py b/icolos/utils/execute_external/macromodel.py new file mode 100644 index 0000000..c3ef932 --- /dev/null +++ b/icolos/utils/execute_external/macromodel.py @@ -0,0 +1,46 @@ +from icolos.utils.enums.program_parameters import ( + MacromodelEnum, + SchrodingerExecutablesEnum, +) +from icolos.utils.execute_external.execute import ExecutorBase + +SEE = SchrodingerExecutablesEnum() +EE = MacromodelEnum() + + +class MacromodelExecutor(ExecutorBase): + """For the execution of the "macromodel" binary.""" + + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + # check, whether a proper executable is provided + if command not in [EE.MACROMODEL]: + raise ValueError( + "Parameter command must be an dictionary of the internal Macromodel executable list." + ) + + return super().execute( + command=command, + arguments=arguments, + check=check, + location=location, + pipe_input=pipe_input, + ) + + def is_available(self): + try: + result = self.execute( + command=EE.MACROMODEL, arguments=[EE.MACROMODEL_HELP], check=True + ) + + if EE.MACROMODEL_HELP_IDENTIFICATION_STRING in result.stdout: + return True + return False + except Exception as e: + return False diff --git a/icolos/utils/execute_external/omega.py b/icolos/utils/execute_external/omega.py new file mode 100644 index 0000000..3a19b5c --- /dev/null +++ b/icolos/utils/execute_external/omega.py @@ -0,0 +1,42 @@ +from icolos.utils.enums.program_parameters import OMEGAEnum +from icolos.utils.execute_external.execute import ExecutorBase + +EE = OMEGAEnum() + + +class OMEGAExecutor(ExecutorBase): + """For the execution of the "oeomega" binary.""" + + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + # check, whether a proper executable is provided + if command not in [EE.OMEGA]: + raise ValueError( + "Parameter command must be an dictionary of the internal Omega executable list." + ) + + return super().execute( + command=command, + arguments=arguments, + check=check, + location=None, + pipe_input=pipe_input, + ) + + def is_available(self): + try: + result = self.execute( + command=EE.OMEGA, arguments=[EE.OMEGA_HELP], check=True + ) + + if EE.OMEGA_HELP_IDENTIFICATION_STRING in result.stderr: + return True + return False + except Exception as e: + return False diff --git a/icolos/utils/execute_external/openbabel.py b/icolos/utils/execute_external/openbabel.py new file mode 100644 index 0000000..80bcab1 --- /dev/null +++ b/icolos/utils/execute_external/openbabel.py @@ -0,0 +1,43 @@ +import os +import sys +from icolos.utils.enums.program_parameters import OpenBabelEnum +from icolos.utils.execute_external.execute import ExecutorBase + +EE = OpenBabelEnum() + + +class OpenBabelExecutor(ExecutorBase): + """For the execution of the "obabel" binary.""" + + def __init__(self): + # in case the environment is not activated, add the path to the binary here + obabel_location = os.path.dirname(sys.executable) + super().__init__(prefix_execution=None, binary_location=obabel_location) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + # check, whether a proper executable is provided + if command not in [EE.OBABEL]: + raise ValueError( + "Parameter command must be an element of the internal OpenBabel executable list." + ) + + return super().execute( + command=command, + arguments=arguments, + check=check, + location=location, + pipe_input=pipe_input, + ) + + def is_available(self): + # unfortunately, "obabel" does not return a meaningful return value (always '1'), so instead try to parse + # the "stdout" of the standard message; note, that "OpenBabel" is part of the environment and should always work + try: + result = self.execute(command=EE.OBABEL, arguments=[], check=False) + if EE.OBABEL_IDENTIFICATION_STRING in result.stdout: + return True + return False + except Exception as e: + return False diff --git a/icolos/utils/execute_external/pmx.py b/icolos/utils/execute_external/pmx.py new file mode 100644 index 0000000..c2c7fdd --- /dev/null +++ b/icolos/utils/execute_external/pmx.py @@ -0,0 +1,54 @@ +from icolos.utils.enums.program_parameters import PMXEnum +from icolos.utils.execute_external.execute import ExecutorBase + +_PE = PMXEnum() + + +class PMXExecutor(ExecutorBase): + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + if command not in [ + _PE.ABFE, + _PE.ANALYSE, + _PE.ATOMMAPPING, + _PE.DOUBLEBOX, + _PE.GENLIB, + _PE.GENTOP, + _PE.LIGANDHYBRID, + _PE.MUTATE, + _PE.BOX_WATER_IONS, + _PE.PREPARE_SIMULATIONS, + _PE.PREPARE_TRANSITIONS, + _PE.RUN_ANALYSIS, + _PE.RUN_SIMULATIONS, + _PE.ASSEMBLE_SYSTEMS, + ]: + raise ValueError( + "Command must be present in internal list of PMX executables." + ) + + # handle for dealing with programs that want interactive input + return super().execute( + command=command, + arguments=arguments, + check=check, + location=location, + pipe_input=pipe_input, + ) + + def is_available(self): + try: + result = self.execute( + command=_PE.ANALYSE, arguments=[_PE.ANALYSE_HELP], check=False + ) + if _PE.ANALYSE_HELP_SUCCESS_STRING in result.stdout: + return True + return False + except Exception as e: + return False diff --git a/icolos/utils/execute_external/prime.py b/icolos/utils/execute_external/prime.py new file mode 100644 index 0000000..3d01075 --- /dev/null +++ b/icolos/utils/execute_external/prime.py @@ -0,0 +1,43 @@ +from icolos.utils.enums.program_parameters import PrimeEnum, SchrodingerExecutablesEnum +from icolos.utils.execute_external.execute import ExecutorBase + +SEE = SchrodingerExecutablesEnum() +EE = PrimeEnum() + + +class PrimeExecutor(ExecutorBase): + """For the execution of the "prime_mmgbsa" binary.""" + + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + # check, whether a proper executable is provided + if command not in [EE.PRIME_MMGBSA]: + raise ValueError( + "Parameter command must be an dictionary of the internal Prime executable list." + ) + + return super().execute( + command=command, + arguments=arguments, + check=check, + location=location, + pipe_input=pipe_input, + ) + + def is_available(self): + try: + result = self.execute( + command=EE.PRIME_MMGBSA, arguments=[EE.PRIME_HELP], check=True + ) + + if EE.PRIME_HELP_IDENTIFICATION_STRING in result.stdout: + return True + return False + except Exception as e: + return False diff --git a/icolos/utils/execute_external/schrodinger.py b/icolos/utils/execute_external/schrodinger.py new file mode 100644 index 0000000..e4608e4 --- /dev/null +++ b/icolos/utils/execute_external/schrodinger.py @@ -0,0 +1,58 @@ +from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum +from icolos.utils.execute_external.execute import ExecutorBase + +_SEE = SchrodingerExecutablesEnum() + + +class SchrodingerExecutor(ExecutorBase): + """For the execution of Schrodinger's support entry points""" + + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + # check, whether a proper executable is provided; update the calls to "$SCHRODINGER/XYZ" + if command == _SEE.STRUCTCAT: + command = _SEE.STRUCTCAT_CALL + elif command == _SEE.SDCONVERT: + command = _SEE.SDCONVERT_CALL + elif command == _SEE.STRUCT_SPLIT: + command = _SEE.STRUCT_SPLIT_CALL + elif command == _SEE.STRUCTCONVERT: + command = _SEE.STRUCTCONVERT_CALL + elif command == _SEE.FMP_STATS: + command = _SEE.FMP_STATS_CALL + elif command == _SEE.PREPWIZARD: + command = _SEE.PREPWIZARD_CALL + elif command == _SEE.MULTISIM_EXEC: + command = _SEE.MULTISIM_EXEC + else: + raise ValueError( + "Parameter command must be an dictionary of the internal Schrodinger entry point list." + ) + + return super().execute( + command=command, + arguments=arguments, + check=check, + location=location, + pipe_input=pipe_input, + ) + + def is_available(self): + try: + result = self.execute( + command=_SEE.STRUCTCAT, arguments=[_SEE.STRUCTCAT_HELP], check=True + ) + + if _SEE.STRUCTCAT_HELP_IDENTIFICATION_STRING in result.stdout: + return True + return False + except Exception as e: + + print(e) + return False diff --git a/icolos/utils/execute_external/sdconvert.py b/icolos/utils/execute_external/sdconvert.py new file mode 100644 index 0000000..af848f8 --- /dev/null +++ b/icolos/utils/execute_external/sdconvert.py @@ -0,0 +1,46 @@ +from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum +from icolos.utils.execute_external.execute import ExecutorBase + +SEE = SchrodingerExecutablesEnum() + + +class SDConvertExecutor(ExecutorBase): + """For the execution of the "sdconvert" binary.""" + + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + # check, whether a proper executable is provided + if command not in [SEE.SDCONVERT]: + raise ValueError( + "Parameter command must be an dictionary of the internal sdconvert executable list." + ) + + # take care of the special path to "sdconvert" + if command == SEE.SDCONVERT: + command = SEE.SDCONVERT_CALL + + return super().execute( + command=command, + arguments=arguments, + check=check, + location=None, + pipe_input=pipe_input, + ) + + def is_available(self): + try: + result = self.execute( + command=SEE.SDCONVERT, arguments=SEE.SDCONVERT_HELP, check=False + ) + + if SEE.SDCONVERT_HELP_IDENTIFICATION_STRING in result.stdout: + return True + return False + except Exception as e: + return False diff --git a/icolos/utils/execute_external/structcat.py b/icolos/utils/execute_external/structcat.py new file mode 100644 index 0000000..fced5d0 --- /dev/null +++ b/icolos/utils/execute_external/structcat.py @@ -0,0 +1,46 @@ +from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum +from icolos.utils.execute_external.execute import ExecutorBase + +SEE = SchrodingerExecutablesEnum() + + +class StructcatExecutor(ExecutorBase): + """For the execution of the "structcat" binary.""" + + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + # check, whether a proper executable is provided + if command not in [SEE.STRUCTCAT]: + raise ValueError( + "Parameter command must be an dictionary of the internal structcat executable list." + ) + + # take care of the special path to "structcat" + if command == SEE.STRUCTCAT: + command = SEE.STRUCTCAT_CALL + + return super().execute( + command=command, + arguments=arguments, + check=check, + location=None, + pipe_input=pipe_input, + ) + + def is_available(self): + try: + result = self.execute( + command=SEE.STRUCTCAT, arguments=SEE.STRUCTCAT_HELP, check=False + ) + + if SEE.STRUCTCAT_HELP_IDENTIFICATION_STRING in result.stdout: + return True + return False + except Exception as e: + return False diff --git a/icolos/utils/execute_external/turbomole.py b/icolos/utils/execute_external/turbomole.py new file mode 100644 index 0000000..5eaac64 --- /dev/null +++ b/icolos/utils/execute_external/turbomole.py @@ -0,0 +1,62 @@ +import os +import shutil +import tempfile + +from icolos.utils.enums.program_parameters import TurbomoleEnum +from icolos.utils.execute_external.execute import ExecutorBase + +EE = TurbomoleEnum() + + +class TurbomoleExecutor(ExecutorBase): + """For the execution of the "turbomole" binary.""" + + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + # check, whether a proper executable is provided + if command not in [ + EE.TM_COSMOPREP, + EE.TM_DEFINE, + EE.TM_RIDFT, + EE.TM_X2T, + EE.TM_T2X, + EE.CT_COSMOTHERM, + EE.TM_JOBEX, + ]: + raise ValueError( + "Parameter command must be an dictionary of the internal Turbomole executable list." + ) + + # TM accesses a folder specified in $TURBOTMPDIR to deposit the particular temporary files for a run; this is + # system-wide, so parallel runs will interfere; also it is not removed automatically + # TODO: find a more elegant solution; is this really necessary for all binaries or only "ridft" and "jobex"? + tmp_dir = tempfile.mkdtemp() + command = "".join(["export TURBOTMPDIR=", tmp_dir, " && ", command]) + + result = super().execute( + command=command, + arguments=arguments, + check=check, + location=location, + pipe_input=pipe_input, + ) + + if tmp_dir is not None and os.path.isdir(tmp_dir): + shutil.rmtree(tmp_dir) + return result + + def is_available(self): + try: + result = self.execute(command=EE.TM_RIDFT, arguments=[], check=True) + + if EE.TM_RIDFT_FAIL_IDENTIFICATION_STRING in result.stderr: + return True + return False + except Exception as e: + return False diff --git a/icolos/utils/execute_external/xtb.py b/icolos/utils/execute_external/xtb.py new file mode 100644 index 0000000..5847cff --- /dev/null +++ b/icolos/utils/execute_external/xtb.py @@ -0,0 +1,40 @@ +from icolos.utils.enums.program_parameters import XTBEnum +from icolos.utils.execute_external.execute import ExecutorBase + +EE = XTBEnum() + + +class XTBExecutor(ExecutorBase): + """For the execution of the "xtb" binary.""" + + def __init__(self, prefix_execution=None, binary_location=None): + super().__init__( + prefix_execution=prefix_execution, binary_location=binary_location + ) + + def execute( + self, command: str, arguments: list, check=True, location=None, pipe_input=None + ): + # check, whether a proper executable is provided + if command not in [EE.XTB]: + raise ValueError( + "Parameter command must be an dictionary of the internal XTB executable list." + ) + + return super().execute( + command=command, + arguments=arguments, + check=check, + location=None, + pipe_input=pipe_input, + ) + + def is_available(self): + try: + result = self.execute(command=EE.XTB, arguments=[EE.XTB_HELP], check=True) + + if EE.XTB_HELP_IDENTIFICATION_STRING in result.stderr: + return True + return False + except Exception as e: + return False diff --git a/icolos/utils/general/__init__.py b/icolos/utils/general/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/utils/general/arparse_bool_extension.py b/icolos/utils/general/arparse_bool_extension.py new file mode 100644 index 0000000..96cb54d --- /dev/null +++ b/icolos/utils/general/arparse_bool_extension.py @@ -0,0 +1,14 @@ +import argparse + + +def str2bool(inp): + if isinstance(inp, bool): + return inp + if inp.lower() in ("yes", "true", "t", "y", "1"): + return True + elif inp.lower() in ("no", "false", "f", "n", "0"): + return False + else: + raise argparse.ArgumentTypeError( + "Expected castable string or boolean value as input." + ) diff --git a/icolos/utils/general/convenience_functions.py b/icolos/utils/general/convenience_functions.py new file mode 100644 index 0000000..8a0fbba --- /dev/null +++ b/icolos/utils/general/convenience_functions.py @@ -0,0 +1,60 @@ +from icolos.utils.general.files_paths import attach_root_path + + +# dictionary convenience functions +# --------- + + +def nested_get(dictionary: dict, keys: list, default=None): + # recursively walk through nested dict, pull the value corresponding to the specified key(s) + if not isinstance(keys, list): + keys = [keys] + if dictionary is None: + return default + if not keys: + return dictionary + return nested_get(dictionary.get(keys[0]), keys[1:], default) + + +def in_keys(dictionary: dict, keys: list) -> bool: + if not isinstance(keys, list): + keys = [keys] + + _dict = dictionary + for key in keys: + try: + _dict = _dict[key] + except KeyError: + return False + return True + + +# parsing "setup.py" +# --------- + + +def parse_setuppy(): + path = attach_root_path("setup.py") + parsed_dict = {} + with open(path, "r") as f: + lines = f.readlines() + for line in lines: + if "name" in line: + parsed_dict["name"] = line[line.find('"') + len('"') : line.rfind('"')] + if "version" in line: + parsed_dict["version"] = line[ + line.find('"') + len('"') : line.rfind('"') + ] + if "license" in line: + parsed_dict["license"] = line[ + line.find('"') + len('"') : line.rfind('"') + ] + if "author" in line: + parsed_dict["author"] = line[ + line.find('"') + len('"') : line.rfind('"') + ] + return parsed_dict + + +def ensure_list(inp) -> list: + return inp if isinstance(inp, list) else [inp] diff --git a/icolos/utils/general/files_paths.py b/icolos/utils/general/files_paths.py new file mode 100644 index 0000000..9c808c6 --- /dev/null +++ b/icolos/utils/general/files_paths.py @@ -0,0 +1,105 @@ +import os +import shutil +import time +import json +import tempfile +from typing import Tuple + +from icolos.utils.enums.step_enums import StepBaseEnum +from icolos.utils.enums.general_utils_enums import CheckFileGenerationEnum + +_SE = StepBaseEnum +_FG = CheckFileGenerationEnum() + + +def check_file_availability( + path: str, interval_sec: int = 1, maximum_sec: int = 10 +) -> str: + counter = 0 + while not os.path.exists(path): + # wait for an interval + time.sleep(interval_sec) + counter = counter + 1 + + # if there's time left, proceed + if maximum_sec is not None and (counter * interval_sec) > maximum_sec: + break + if os.path.exists(path): + if os.path.getsize(path) == 0: + return _FG.GENERATED_EMPTY + else: + return _FG.GENERATED_SUCCESS + else: + return _FG.NOT_GENERATED + + +def remove_folder(folder_path: str): + if os.path.isdir(folder_path): + shutil.rmtree(folder_path) + + +def empty_output_dir(path: str): + for root, subf, files in os.walk(path): + for file in files: + os.remove(os.path.join(root, file)) + + +def move_up_directory(path, n=1): + """Function, to move up 'n' directories for a given "path".""" + # add +1 to take file into account + if os.path.isfile(path): + n += 1 + for _ in range(n): + path = os.path.dirname(os.path.abspath(path)) + return path + + +def attach_root_path(path): + """Function to attach the root path of the module for a given "path".""" + ROOT_DIR = move_up_directory(os.path.abspath(__file__), n=3) + return os.path.join(ROOT_DIR, path) + + +def lines_in_file(path): + with open(path) as f: + for i, l in enumerate(f): + pass + return i + 1 + + +def dict_from_json_file(path): + with open(path, "r") as f: + return json.load(f) + + +def any_in_file(path, strings): + if isinstance(strings, str): + strings = [strings] + if os.path.isfile(path): + with open(path, "r") as f: + file_raw = f.readlines() + for string in strings: + if any(string in line for line in file_raw): + return True + return False + else: + return False + + +def infer_input_type(path: str) -> str: + basename = os.path.basename(path) + ending = basename[-3:].upper() + if ending in [_SE.FORMAT_SDF, _SE.FORMAT_CSV, _SE.FORMAT_SMI]: + return ending + else: + raise ValueError(f"Ending {ending} not supported.") + + +def gen_tmp_file( + suffix: str = None, prefix: str = None, dir: str = None, text: bool = True +) -> Tuple[str, str]: + """Function wraps tempfile.mkstemp(), but closes the connection and returns the file name instead of the handler.""" + # note that in contrast to the underlying "mkstemp" function, "text" is set to True here + fhandle, path = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir, text=text) + os.close(fhandle) + return os.path.basename(path), path diff --git a/icolos/utils/general/icolos_exceptions.py b/icolos/utils/general/icolos_exceptions.py new file mode 100644 index 0000000..c79f0e3 --- /dev/null +++ b/icolos/utils/general/icolos_exceptions.py @@ -0,0 +1,25 @@ +class ExecutionFailed(Exception): + pass + + +class StepFailed(Exception): + pass + + +class ContainerCorrupted(Exception): + pass + + +def get_exception_message(e: Exception): + if e is None: + return None + if hasattr(e, "message"): + return e.message + else: + return e + + +def get_exception_type(e: Exception) -> str: + if e is None: + return None + return type(e).__name__ diff --git a/icolos/utils/general/molecules.py b/icolos/utils/general/molecules.py new file mode 100644 index 0000000..e681273 --- /dev/null +++ b/icolos/utils/general/molecules.py @@ -0,0 +1,20 @@ +from rdkit import Chem +from rdkit.Chem import rdmolops + +from icolos.utils.enums.compound_enums import CompoundTagsEnum + + +def get_charge_for_molecule(molecule: Chem.Mol, add_as_tag=False) -> int: + _MTE = CompoundTagsEnum() + charge = rdmolops.GetFormalCharge(molecule) + if add_as_tag: + molecule.SetProp(_MTE.FORMAL_CHARGE_TAG, str(charge)) + return charge + + +def write_molecule_to_sdf(path: str, molecule: Chem.Mol): + if molecule is None or not isinstance(molecule, Chem.Mol): + raise ValueError("Function requires input attribute to be an RDkit molecule.") + writer = Chem.SDWriter(path) + writer.write(molecule) + writer.close() diff --git a/icolos/utils/general/notifications.py b/icolos/utils/general/notifications.py new file mode 100644 index 0000000..e69de29 diff --git a/icolos/utils/general/parallelization.py b/icolos/utils/general/parallelization.py new file mode 100644 index 0000000..6978a25 --- /dev/null +++ b/icolos/utils/general/parallelization.py @@ -0,0 +1,159 @@ +import math +import multiprocessing +from typing import List, Callable, Dict, Any +from pydantic import BaseModel +from icolos.utils.enums.parallelization import ParallelizationEnum + +_PE = ParallelizationEnum + + +class Subtask(BaseModel): + status: _PE = _PE.STATUS_READY + times_tried: int = 0 + data: Any + + def increment_tries(self): + self.times_tried += 1 + + def set_status(self, status: str): + self.status = status + + def set_status_failed(self): + self.set_status(_PE.STATUS_FAILED) + + def set_status_success(self): + self.set_status(_PE.STATUS_SUCCESS) + + +class SubtaskContainer(BaseModel): + max_tries: int + subtasks: List[Subtask] = [] + + def __init__(self, **data): + super().__init__(**data) + + def clear(self): + self.subtasks = [] + + def load_data(self, data: List[Any]): + self.clear() + self.add_data(data=data) + + def add_data(self, data: List[Any]): + for data_element in data: + self.subtasks.append( + Subtask(status=_PE.STATUS_READY, times_tried=0, data=data_element) + ) + + def get_todo_tasks(self) -> List[Subtask]: + todo_subtasks = [] + for subtask in self.subtasks: + if ( + subtask.status == _PE.STATUS_READY + or subtask.status == _PE.STATUS_FAILED + ) and subtask.times_tried < self.max_tries: + todo_subtasks.append(subtask) + return todo_subtasks + + def get_done_tasks(self) -> List[Subtask]: + done_subtasks = [] + for subtask in self.subtasks: + if ( + subtask.status == _PE.STATUS_SUCCESS + or subtask.times_tried >= self.max_tries + ): + done_subtasks.append(subtask) + return done_subtasks + + def get_sublists( + self, partitions=None, slice_size=None, get_first_n_lists=None + ) -> List[List[Subtask]]: + if partitions is None and slice_size is None: + raise ValueError("Either specify partitions or slice size.") + + # only get tasks that are not yet completed or have some tries left + subtasks = self.get_todo_tasks() + + # decide on the chunk size, either by doing partitions or by specifying the slice size directly + sublists = [] + if partitions is not None: + chunk_size = int(math.ceil(len(subtasks) / partitions)) + else: + chunk_size = slice_size + + # wrap the tasks in lists as required + for i in range(0, len(subtasks), chunk_size): + sublist = [] + for j in range(i, min(i + chunk_size, len(subtasks))): + sublist.append(subtasks[j]) + sublists.append(sublist) + + if get_first_n_lists is not None and len(sublists) > get_first_n_lists: + return sublists[:get_first_n_lists] + else: + return sublists + + def done(self) -> bool: + for subtask in self.subtasks: + if subtask.status == _PE.STATUS_SUCCESS: + continue + if subtask.status == _PE.STATUS_READY or ( + subtask.status == _PE.STATUS_FAILED + and subtask.times_tried < self.max_tries + ): + return False + return True + + def any_failed(self) -> bool: + if len( + [True for subtask in self.subtasks if subtask.status == _PE.STATUS_FAILED] + ): + return True + return False + + def set_max_tries(self, max_tries: int): + self.max_tries = max_tries + + def __len__(self) -> int: + return len(self.subtasks) + + +class Parallelizer(BaseModel): + func: Callable + collect_rtn_codes: bool = False + + def __init__(self, **data): + super().__init__(**data) + + def rearrange_input(self, inp_dict: Dict[str, List]) -> List[Dict]: + return [dict(zip(inp_dict, ele)) for ele in zip(*inp_dict.values())] + + def execute_parallel(self, **kwargs): + # translate the dictionary with the lists of arguments into a list of individual dictionaries + # e.g. {'one': [1, 2, 3], 'two': ['aaaa', 'bbb', 'cc'], 'three': [0.2, 0.2, 0.1]} ---> + # [{'one': 1, 'two': 'aaaa', 'three': 0.2}, + # {'one': 2, 'two': 'bbb', 'three': 0.2}, + # {'one': 3, 'two': 'cc', 'three': 0.1}] + list_exec = self.rearrange_input(kwargs) + + # run in parallel; wait for all subjobs to finish before proceeding + # Optional mechanism for collecting return code from subprocessees + if self.collect_rtn_codes: + manager = multiprocessing.Manager() + q = manager.dict() + for subprocess_args in list_exec: + subprocess_args["q"] = q + # rtn_codes = [] + processes = [] + for subprocess_args in list_exec: + p = multiprocessing.Process(target=self.func, kwargs=subprocess_args) + processes.append(p) + p.start() + # for p in processes: + # ret = q.get() + # rtn_codes.append(ret) + for p in processes: + p.join() + + if self.collect_rtn_codes: + return q.values() diff --git a/icolos/utils/general/print_log.py b/icolos/utils/general/print_log.py new file mode 100644 index 0000000..8324b1a --- /dev/null +++ b/icolos/utils/general/print_log.py @@ -0,0 +1,14 @@ +import os +from icolos.loggers.blank_logger import BlankLogger + + +def print_log_file(path: str, logger, level): + logger_blank = BlankLogger() + if os.path.isfile(path): + with open(path, "r") as log_file: + log_file_raw = log_file.readlines() + logger.log(f"Printing log file {path}:\n", level) + for line in log_file_raw: + logger_blank.log(line.rstrip("\n"), level) + logger_blank.log("", level) + logger.log("--- End file", level) diff --git a/icolos/utils/general/progress_bar.py b/icolos/utils/general/progress_bar.py new file mode 100644 index 0000000..1c67ed6 --- /dev/null +++ b/icolos/utils/general/progress_bar.py @@ -0,0 +1,7 @@ +def get_progress_bar_string( + done, total, prefix="", suffix="", decimals=1, length=100, fill="█" +): + percent = ("{0:." + str(decimals) + "f}").format(100 * (done / float(total))) + filledLength = int(length * done // total) + bar = fill * filledLength + "-" * (length - filledLength) + return f"{prefix}|{bar}| {percent}% {suffix}" diff --git a/icolos/utils/general/strings.py b/icolos/utils/general/strings.py new file mode 100644 index 0000000..77ae4ca --- /dev/null +++ b/icolos/utils/general/strings.py @@ -0,0 +1,8 @@ +def stringify(obj): + """Converts all objects in a dict to str, recursively.""" + if isinstance(obj, dict): + return {str(key): stringify(value) for key, value in obj.items()} + elif isinstance(obj, list): + return [stringify(value) for value in obj] + else: + return str(obj) diff --git a/icolos/utils/smiles.py b/icolos/utils/smiles.py new file mode 100644 index 0000000..f97d4f3 --- /dev/null +++ b/icolos/utils/smiles.py @@ -0,0 +1,145 @@ +import random + +import rdkit.Chem as rkc +from rdkit import Chem +from rdkit.Chem import AllChem +from rdkit.Chem import SaltRemover +from rdkit.Chem import rdmolops + + +def _initialiseNeutralisationReactions(): + patts = ( + # Imidazoles + ("[n+;H]", "n"), + # Amines + ("[N+;!H0]", "N"), + # Carboxylic acids and alcohols + ("[$([O-]);!$([O-][#7])]", "O"), + # Thiols + ("[S-;X1]", "S"), + # Sulfonamides + ("[$([N-;X2]S(=O)=O)]", "N"), + # Enamines + ("[$([N-;X2][C,N]=C)]", "N"), + # Tetrazoles + ("[n-]", "[nH]"), + # Sulfoxides + ("[$([S-]=O)]", "S"), + # Amides + ("[$([N-]C=O)]", "N"), + ) + return [(Chem.MolFromSmarts(x), Chem.MolFromSmiles(y, False)) for x, y in patts] + + +def _neutralise_charges(mol, reactions=None): + if reactions is None: + reactions = _initialiseNeutralisationReactions() + replaced = False + for i, (reactant, product) in enumerate(reactions): + while mol.HasSubstructMatch(reactant): + replaced = True + rms = AllChem.ReplaceSubstructs(mol, reactant, product) + mol = rms[0] + if replaced: + return mol, True + else: + return mol, False + + +def _get_largest_fragment(mol): + frags = rdmolops.GetMolFrags(mol, asMols=True, sanitizeFrags=True) + maxmol = None + for mol in frags: + if mol is None: + continue + if maxmol is None: + maxmol = mol + if maxmol.GetNumHeavyAtoms() < mol.GetNumHeavyAtoms(): + maxmol = mol + return maxmol + + +_saltremover = SaltRemover.SaltRemover() + + +def _valid_size( + mol, min_heavy_atoms, max_heavy_atoms, element_list, remove_long_side_chains +): + """Filters molecules on number of heavy atoms and atom types""" + mol = _rare_filters(mol) + if mol: + correct_size = min_heavy_atoms < mol.GetNumHeavyAtoms() < max_heavy_atoms + if not correct_size: + return + + valid_elements = all( + [atom.GetAtomicNum() in element_list for atom in mol.GetAtoms()] + ) + if not valid_elements: + return + + has_long_sidechains = False + if remove_long_side_chains: + # remove aliphatic side chains with at least 5 carbons not in a ring + sma = "[CR0]-[CR0]-[CR0]-[CR0]-[CR0]" + has_long_sidechains = mol.HasSubstructMatch(Chem.MolFromSmarts(sma)) + + return correct_size and valid_elements and not has_long_sidechains + + +def _rare_filters(mol): + if mol: + ciano_filter = "[C-]#[N+]" + oh_filter = "[OH+]" + sulfur_filter = "[SH]" + if ( + not mol.HasSubstructMatch(Chem.MolFromSmarts(ciano_filter)) + and not mol.HasSubstructMatch(Chem.MolFromSmarts(oh_filter)) + and not mol.HasSubstructMatch(Chem.MolFromSmarts(sulfur_filter)) + ): + return mol + + +def convert_to_rdkit_smiles(smiles): + return Chem.MolToSmiles( + Chem.MolFromSmiles(smiles, sanitize=False), isomericSmiles=True + ) + + +def randomize_smiles(smiles, random_type="restricted"): + """ + Returns a random SMILES given a SMILES of a molecule. + :param random_type: The type (unrestricted, restricted) of randomization performed. + :return : A random SMILES string of the same molecule or None if the molecule is invalid. + """ + mol = Chem.MolFromSmiles(smiles) + if not mol: + return None + + if random_type == "unrestricted": + return rkc.MolToSmiles(mol, canonical=False, doRandom=True, isomericSmiles=True) + if random_type == "restricted": + new_atom_order = list(range(mol.GetNumHeavyAtoms())) + random.shuffle(new_atom_order) + random_mol = rkc.RenumberAtoms(mol, newOrder=new_atom_order) + return rkc.MolToSmiles(random_mol, canonical=False, isomericSmiles=True) + raise ValueError("Type '{}' is not valid".format(random_type)) + + +def to_mol(smi): + """ + Creates a Mol object from a SMILES string. + :param smi: SMILES string. + :return: A Mol object or None if it's not valid. + """ + if smi: + return rkc.MolFromSmiles(smi) + + +def to_smiles(mol): + """ + Converts a Mol object into a canonical SMILES string. + :param mol: Mol object. + :return: A SMILES string. + """ + return rkc.MolToSmiles(mol, isomericSmiles=True) diff --git a/icolos_workflow.py b/icolos_workflow.py new file mode 100644 index 0000000..d9ba5bd --- /dev/null +++ b/icolos_workflow.py @@ -0,0 +1,65 @@ +from typing import Dict +from pydantic import BaseModel +import os +import sys + +from icolos.core.composite_agents.workflow import WorkFlow +from icolos.utils.enums.logging_enums import LoggingConfigEnum + +from icolos.utils.entry_point_functions.logging_helper_functions import ( + initialize_logging, +) +from icolos.utils.entry_point_functions.parsing_functions import ( + get_runtime_global_variables, + add_global, +) +from icolos.utils.general.files_paths import attach_root_path +from icolos.utils.enums.composite_agents_enums import WorkflowEnum + +_WE = WorkflowEnum() +_LE = LoggingConfigEnum() + + +class IcolosWorkflow(BaseModel): + """ + Alternative programmatic entrypoint for the Icolos workflow + """ + + config: Dict = None + workflow: WorkFlow = None + logging: str = None + global_vars: Dict = None + + def __init__(self, config, global_vars: Dict = None) -> None: + super().__init__(**config) + + self.config = self._parse_global_vars(config, global_vars) + self.workflow = WorkFlow(**config[_WE.WORKFLOW]) + # tutorial settings logs everything to stdout as well as the file + self.logging = "tutorial" + + def _initialize_logging(self): + log_conf = attach_root_path(_LE.PATH_CONFIG_TUTORIAL) + logger = initialize_logging(log_conf_path=log_conf, workflow_conf=self.config) + return logger + + def _parse_global_vars(self, config, global_vars): + # substitute global vars throughout the config file, return modified config + + if global_vars is not None: + config = add_global(config, global_vars, _WE.GLOBAL_VARIABLES) + config = add_global( + config, + get_runtime_global_variables( + os.path.join(os.getcwd(), "config.json"), os.path.realpath(__file__) + ), + _WE.GLOBAL_VARIABLES, + ) + return config + + def execute(self): + self._initialize_logging() + self.workflow.initialize() + self.workflow.execute() + + sys.exit(0) diff --git a/integration_tests.py b/integration_tests.py new file mode 100644 index 0000000..a57f659 --- /dev/null +++ b/integration_tests.py @@ -0,0 +1,5 @@ +from tests.integration_tests import * + + +if __name__ == "__main__": + unittest.main() diff --git a/licences/espsim_licence.txt b/licences/espsim_licence.txt new file mode 100644 index 0000000..a5c8ed3 --- /dev/null +++ b/licences/espsim_licence.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Esther Heid + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..07de284 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/sdf2smi.py b/sdf2smi.py new file mode 100644 index 0000000..dbefbbc --- /dev/null +++ b/sdf2smi.py @@ -0,0 +1,99 @@ +import os +import pandas as pd +import argparse + +import rdkit.Chem as Chem + +from icolos.core.step_utils.rdkit_utils import to_smiles + + +if __name__ == "__main__": + + # get the input parameters and parse them + parser = argparse.ArgumentParser( + description="Implements simple translator taking an SDF file and spitting out SMILES." + ) + parser.add_argument("-sdf", type=str, default=None, help="A path a SDF file.") + parser.add_argument( + "-smi", + type=str, + default=None, + required=False, + help="A path an output text file.", + ) + parser.add_argument( + "-csv", + type=str, + default=None, + required=False, + help="A path an output CSV file.", + ) + parser.add_argument( + "-keep_stereo", + action="store_true", + help="If set, exported SMILES contain stereo-information.", + ) + parser.add_argument( + "-tags2columns", + type=str, + nargs="+", + default=None, + required=False, + help="A list of strings for which tags should be transformed into columns.", + ) + args = parser.parse_args() + + if args.sdf is None or not os.path.isfile(args.sdf): + raise Exception( + 'Parameter "-sdf" must be a relative or absolute path to valid sdf file.' + ) + if args.smi is None and args.csv is None: + raise Exception( + 'At least one of the "-smi" or "-csv" output paths must be set.' + ) + + molecules = [] + for mol in Chem.SDMolSupplier(args.sdf): + if mol is None: + continue + molecules.append(mol) + + # write out + # --------- + if args.smi is not None: + with open(args.smi, "w") as smi_file: + for mol in molecules: + smi_file.write(to_smiles(mol, isomericSmiles=args.keep_stereo) + "\n") + + if args.csv is not None: + data_buffer = [] + columns = ["Name", "SMILES"] + tags2columns = [] + if args.tags2columns is not None: + tags2columns = args.tags2columns + columns = columns + tags2columns + for mol in molecules: + # add default columns for this row + row = [ + mol.GetProp("_Name"), + to_smiles(mol, isomericSmiles=args.keep_stereo), + ] + + # add selected columns for this row (if specified) + for tag in tags2columns: + try: + row.append(mol.GetProp(tag)) + except KeyError: + row.append(None) + + data_buffer.append(row) + df_writeout = pd.DataFrame(data_buffer, columns=columns) + df_writeout.to_csv( + path_or_buf=args.csv, + sep=",", + na_rep="", + header=True, + index=False, + mode="w", + quoting=None, + ) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..3a54d9c --- /dev/null +++ b/setup.py @@ -0,0 +1,17 @@ +from setuptools import setup, find_packages + +setup( + name="icolos", + maintainer="Christian Margreitter, Harry Moore", + version="1.4.0", + packages=find_packages("."), + include_package_data=True, + package_dir={"config": "icolos/config"}, + package_data={"icolos": ["config/logging/*.json"]}, + description="Icolos Workflow Manager", + entry_points=""" + [console_scripts] + icolos=icolos.scripts.cli:entry_point + """, + python_requires=">=3.8", +) diff --git a/tests/CREST/__init__.py b/tests/CREST/__init__.py new file mode 100644 index 0000000..adaeac4 --- /dev/null +++ b/tests/CREST/__init__.py @@ -0,0 +1 @@ +from tests.CREST.test_CREST_confgen import * diff --git a/tests/CREST/test_CREST_confgen.py b/tests/CREST/test_CREST_confgen.py new file mode 100644 index 0000000..c8046d5 --- /dev/null +++ b/tests/CREST/test_CREST_confgen.py @@ -0,0 +1,115 @@ +import unittest +import os + +from icolos.core.workflow_steps.confgen.crest import StepCREST + +from icolos.utils.enums.step_enums import StepBaseEnum +from icolos.utils.enums.program_parameters import CrestEnum + +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + MAIN_CONFIG, + export_unit_test_env_vars, + get_mol_as_Compound, +) +from icolos.utils.general.files_paths import attach_root_path + +_SBE = StepBaseEnum +_CE = CrestEnum() + + +class Test_CREST_confgen(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/CREST") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + self._paracetamol_molecule = get_mol_as_Compound( + PATHS_EXAMPLEDATA.PARACETAMOL_PATH + ) + self._aspirin_molecule = get_mol_as_Compound(PATHS_EXAMPLEDATA.ASPIRIN_PATH) + + @classmethod + def tearDownClass(cls): + pass + + def test_coordinate_generation_neutral(self): + step_conf = { + _SBE.STEPID: "01_conf_gen_crest", + _SBE.STEP_TYPE: _SBE.STEP_CREST, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: None, + _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["CREST_BINARY_LOCATION"], + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 7}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-niceprint"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _CE.CREST_OPT: "normal", + _CE.CREST_G: "h2o", + _CE.CREST_RTHR: 0.5, + _CE.CREST_ETHR: 0.25, + _CE.CREST_EWIN: 8.0, + _CE.CREST_PTHR: 0.4, + _CE.CREST_BTHR: 0.02, + }, + } + }, + } + crest_step = StepCREST(**step_conf) + crest_step.data.compounds = [self._paracetamol_molecule] + crest_step.execute() + + # check number of conformers returned (only one Compound with only one Enumeration) + self.assertGreaterEqual(len(crest_step.get_compounds()[0][0]), 18) + + # check SDF write-out (including energy-as-tag annotation) + out_path = os.path.join(self._test_dir, "CREST_conformers_paracetamol.sdf") + crest_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 35000) + + def test_coordinate_generation_charged(self): + step_conf = { + _SBE.STEPID: "01_conf_gen_crest", + _SBE.STEP_TYPE: _SBE.STEP_CREST, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: None, + _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["CREST_BINARY_LOCATION"], + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 7}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-niceprint"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _CE.CREST_OPT: "normal", + _CE.CREST_G: "h2o", + _CE.CREST_RTHR: 0.5, + _CE.CREST_ETHR: 0.25, + _CE.CREST_EWIN: 8.0, + _CE.CREST_PTHR: 0.4, + _CE.CREST_BTHR: 0.02, + }, + } + }, + } + + # check number of conformers returned + crest_step = StepCREST(**step_conf) + crest_step.data.compounds = [self._aspirin_molecule] + crest_step.execute() + + # check number of conformers returned (only one Compound with only one Enumeration) + self.assertGreaterEqual(len(crest_step.get_compounds()[0][0]), 2) + + # check SDF write-out (including energy-as-tag annotation) + out_path = os.path.join(self._test_dir, "CREST_conformers_aspirin.sdf") + crest_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + print(stat_inf.st_size) + self.assertGreater(stat_inf.st_size, 3200) diff --git a/tests/OMEGA/__init__.py b/tests/OMEGA/__init__.py new file mode 100644 index 0000000..a37a394 --- /dev/null +++ b/tests/OMEGA/__init__.py @@ -0,0 +1 @@ +from tests.OMEGA.test_OMEGA_confgen import Test_OMEGA_confgen diff --git a/tests/OMEGA/test_OMEGA_confgen.py b/tests/OMEGA/test_OMEGA_confgen.py new file mode 100644 index 0000000..3fdaf15 --- /dev/null +++ b/tests/OMEGA/test_OMEGA_confgen.py @@ -0,0 +1,124 @@ +import unittest +import os + +from icolos.core.workflow_steps.confgen.omega import StepOmega + +from icolos.utils.enums.step_enums import StepBaseEnum +from icolos.utils.enums.program_parameters import OMEGAEnum + +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + export_unit_test_env_vars, + get_mol_as_Compound, +) +from icolos.utils.general.files_paths import attach_root_path + +_SBE = StepBaseEnum +_CE = OMEGAEnum() + + +class Test_OMEGA_confgen(unittest.TestCase): + @classmethod + def setUpClass(cls): + + cls._test_dir = attach_root_path("tests/junk/OMEGA") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + self._paracetamol_molecule = get_mol_as_Compound( + PATHS_EXAMPLEDATA.PARACETAMOL_PATH + ) + self._aspirin_molecule = get_mol_as_Compound(PATHS_EXAMPLEDATA.ASPIRIN_PATH) + + @classmethod + def tearDownClass(cls): + pass + + def test_coordinate_generation_neutral(self): + step_conf = { + _SBE.STEPID: "01_conf_gen_omega", + _SBE.STEP_TYPE: _SBE.STEP_OMEGA, + _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load omega"}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _CE.CLASSIC_MAXCONFS: 50, + _CE.CLASSIC_RMS: 0.05, + }, + } + }, + } + omega_step = StepOmega(**step_conf) + omega_step.data.compounds = [self._paracetamol_molecule] + omega_step.execute() + + # check number of conformers returned (only one Compound with only one Enumeration) + self.assertEqual(len(omega_step.get_compounds()[0][0]), 2) + + # check SDF write-out (including energy-as-tag annotation) + out_path = os.path.join(self._test_dir, "OMEGA_conformers_paracetamol.sdf") + omega_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 4274) + + def test_coordinate_generation_neutral_high_RMS(self): + step_conf = { + _SBE.STEPID: "01_conf_gen_omega", + _SBE.STEP_TYPE: _SBE.STEP_OMEGA, + _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load omega"}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _CE.CLASSIC_MAXCONFS: 10, + _CE.CLASSIC_RMS: 0.7, + }, + } + }, + } + omega_step = StepOmega(**step_conf) + omega_step.data.compounds = [self._paracetamol_molecule] + omega_step.execute() + + # check number of conformers returned (only one Compound with only one Enumeration) + self.assertEqual(len(omega_step.get_compounds()[0][0]), 1) + + # check SDF write-out (including energy-as-tag annotation) + out_path = os.path.join( + self._test_dir, "OMEGA_conformers_paracetamol_highRMS.sdf" + ) + omega_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 2137) + + def test_coordinate_generation_charged(self): + step_conf = { + _SBE.STEPID: "01_conf_gen_omega", + _SBE.STEP_TYPE: _SBE.STEP_OMEGA, + _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load omega"}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _CE.CLASSIC_MAXCONFS: 10, + _CE.CLASSIC_RMS: 0.0, + }, + } + }, + } + omega_step = StepOmega(**step_conf) + omega_step.data.compounds = [self._aspirin_molecule] + omega_step.execute() + + # check number of conformers returned (only one Compound with only one Enumeration) + self.assertEqual(len(omega_step.get_compounds()[0][0]), 2) + + # check SDF write-out (including energy-as-tag annotation) + out_path = os.path.join(self._test_dir, "OMEGA_conformers_aspirin.sdf") + omega_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 3480) diff --git a/tests/Turbomole/__init__.py b/tests/Turbomole/__init__.py new file mode 100644 index 0000000..29b5ecd --- /dev/null +++ b/tests/Turbomole/__init__.py @@ -0,0 +1 @@ +from tests.Turbomole.test_Turbomole import * diff --git a/tests/Turbomole/test_Turbomole.py b/tests/Turbomole/test_Turbomole.py new file mode 100644 index 0000000..a71cc90 --- /dev/null +++ b/tests/Turbomole/test_Turbomole.py @@ -0,0 +1,253 @@ +import unittest +import os + +from icolos.core.workflow_steps.calculation.turbomole import StepTurbomole + +from icolos.utils.enums.step_enums import StepBaseEnum, StepTurbomoleEnum +from icolos.utils.enums.program_parameters import TurbomoleEnum + +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + export_unit_test_env_vars, + get_mol_as_Compound, + get_mol_as_Conformer, + MAIN_CONFIG, +) +from icolos.utils.enums.compound_enums import ConformerContainerEnum +from icolos.utils.general.files_paths import attach_root_path +import time + +_SBE = StepBaseEnum +_TE = TurbomoleEnum() +_COE = ConformerContainerEnum() +_STE = StepTurbomoleEnum() + + +class Test_Turbomole(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/Turbomole") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + # initialize a Compound with 1 Enumeration and 2 Conformers (done by OMEGA) + _paracetamol_molecule = get_mol_as_Compound(PATHS_EXAMPLEDATA.PARACETAMOL_PATH) + confs = get_mol_as_Conformer(PATHS_EXAMPLEDATA.PARACETAMOL_MULTIPLE_CONF) + _paracetamol_molecule[0].add_conformers(confs, auto_update=True) + self._paracetamol_molecule = _paracetamol_molecule + + @classmethod + def tearDownClass(cls): + pass + + def test_Turbomole_run_ridft_single_core(self): + step_conf = { + _SBE.STEPID: "01_turbomole", + _SBE.STEP_TYPE: _SBE.STEP_TURBOMOLE, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load turbomole/73", + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 1}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _TE.TM_CONFIG_DIR: MAIN_CONFIG["TURBOMOLE_CONFIG"], + _TE.TM_CONFIG_BASENAME: "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge", + _TE.TM_CONFIG_COSMO: os.path.join( + MAIN_CONFIG["TURBOMOLE_CONFIG"], "cosmoprep_eps80.tm" + ), + _STE.EXECUTION_MODE: _TE.TM_RIDFT, + }, + }, + } + + os.environ["PARA_ARCH"] = "MPI" + os.environ["PARNODES"] = "4" + tm_step = StepTurbomole(**step_conf) + tm_step.data.compounds = [self._paracetamol_molecule] + + # conformer coordinates should not be touched by the execution + self.assertListEqual( + list( + tm_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [0.8785, 0.6004, -0.2173], + ) + tm_step.execute() + self.assertListEqual( + list( + tm_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [0.8785, 0.6004, -0.2173], + ) + cosmofile = tm_step.get_compounds()[0][0][0].get_extra_data()[ + _COE.EXTRA_DATA_COSMOFILE + ] + coordfile = tm_step.get_compounds()[0][0][0].get_extra_data()[ + _COE.EXTRA_DATA_COORDFILE + ] + self.assertTrue("basgrd points= 9806" in cosmofile[5]) + + # check write-out + out_path = os.path.join(self._test_dir, "paracetamol_conf1_CosmoFile") + with open(out_path, "w") as f: + f.writelines(cosmofile) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 132018) + + out_path = os.path.join(self._test_dir, "paracetamol_conf1_CoordFile") + with open(out_path, "w") as f: + f.writelines(coordfile) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 13544) + + def test_Turbomole_run_ridft_dual_core(self): + step_conf = { + _SBE.STEPID: "01_turbomole", + _SBE.STEP_TYPE: _SBE.STEP_TURBOMOLE, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load turbomole/73", + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 2}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _TE.TM_CONFIG_DIR: MAIN_CONFIG["TURBOMOLE_CONFIG"], + _TE.TM_CONFIG_BASENAME: "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge", + _TE.TM_CONFIG_COSMO: os.path.join( + MAIN_CONFIG["TURBOMOLE_CONFIG"], "cosmoprep_eps80.tm" + ), + _STE.EXECUTION_MODE: _TE.TM_RIDFT, + }, + }, + } + os.environ["PARA_ARCH"] = "MPI" + os.environ["PARNODES"] = "4" + + tm_step = StepTurbomole(**step_conf) + tm_step.data.compounds = [self._paracetamol_molecule] + + # conformer coordinates should not be touched by the execution + self.assertListEqual( + list( + tm_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [0.8785, 0.6004, -0.2173], + ) + t1 = time.time() + tm_step.execute() + t2 = time.time() + + self.assertLess(t2 - t1, 50) + self.assertListEqual( + list( + tm_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [0.8785, 0.6004, -0.2173], + ) + cosmofile = tm_step.get_compounds()[0][0][0].get_extra_data()[ + _COE.EXTRA_DATA_COSMOFILE + ] + coordfile = tm_step.get_compounds()[0][0][0].get_extra_data()[ + _COE.EXTRA_DATA_COORDFILE + ] + self.assertTrue("basgrd points= 9806" in cosmofile[5]) + + # check write-out + out_path = os.path.join(self._test_dir, "paracetamol_conf1_CosmoFile") + with open(out_path, "w") as f: + f.writelines(cosmofile) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 132018) + + out_path = os.path.join(self._test_dir, "paracetamole_conf1_CoordFile") + with open(out_path, "w") as f: + f.writelines(coordfile) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 13544) + + def test_Turbomole_run_jobex(self): + step_conf = { + _SBE.STEPID: "01_turbomole", + _SBE.STEP_TYPE: _SBE.STEP_TURBOMOLE, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load turbomole/73", + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 2}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-ri"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _TE.TM_JOBEX_C: 70, + _TE.TM_JOBEX_GCART: 3, + }, + }, + _SBE.SETTINGS_ADDITIONAL: { + _TE.TM_CONFIG_DIR: MAIN_CONFIG["TURBOMOLE_CONFIG"], + _TE.TM_CONFIG_BASENAME: "b97-3c-ri-d3-def2-mtzvp-int-charge", + _TE.TM_CONFIG_COSMO: os.path.join( + MAIN_CONFIG["TURBOMOLE_CONFIG"], "cosmoprep_eps80.tm" + ), + _STE.EXECUTION_MODE: _TE.TM_JOBEX, + }, + }, + } + + os.environ["PARA_ARCH"] = "MPI" + os.environ["PARNODES"] = "3" + tm_step = StepTurbomole(**step_conf) + tm_step.data.compounds = [self._paracetamol_molecule] + + # conformer coordinates should be touched by the execution (this is geo opt) + self.assertListEqual( + list( + tm_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [0.8785, 0.6004, -0.2173], + ) + tm_step.execute() + self.assertListEqual( + list( + tm_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-2.1919, -3.3229, 0.3518], + ) + cosmofile = tm_step.get_compounds()[0][0][0].get_extra_data()[ + _COE.EXTRA_DATA_COSMOFILE + ] + + self.assertTrue("nspa= 92" in cosmofile[5]) + + # check write-out + out_path = os.path.join(self._test_dir, "paracetamol_conf1_CosmoFile_jobex") + with open(out_path, "w") as f: + f.writelines(cosmofile) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 115864) diff --git a/tests/XTB/__init__.py b/tests/XTB/__init__.py new file mode 100644 index 0000000..8cf1a75 --- /dev/null +++ b/tests/XTB/__init__.py @@ -0,0 +1 @@ +from tests.XTB.test_XTB_confgen import Test_XTB_confgen diff --git a/tests/XTB/test_XTB_confgen.py b/tests/XTB/test_XTB_confgen.py new file mode 100644 index 0000000..7166b01 --- /dev/null +++ b/tests/XTB/test_XTB_confgen.py @@ -0,0 +1,207 @@ +import unittest +import os + +from icolos.core.workflow_steps.confgen.xtb import StepXTB + +from icolos.utils.enums.step_enums import StepBaseEnum +from icolos.utils.enums.program_parameters import XTBEnum + +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + MAIN_CONFIG, + export_unit_test_env_vars, + get_mol_as_Compound, + get_ligands_as_compounds_with_conformers, + get_mol_as_Conformer, +) +from icolos.utils.general.files_paths import attach_root_path +import time + +_SBE = StepBaseEnum +_CE = XTBEnum() + + +class Test_XTB_confgen(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/XTB") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + self._paracetamol_molecule = get_mol_as_Compound( + PATHS_EXAMPLEDATA.PARACETAMOL_PATH + ) + self._aspirin_molecule = get_mol_as_Compound(PATHS_EXAMPLEDATA.ASPIRIN_PATH) + self._medium_molecules = get_ligands_as_compounds_with_conformers( + PATHS_EXAMPLEDATA.MEDIUM_MOLECULES_SDF_PATH + ) + + @classmethod + def tearDownClass(cls): + pass + + def test_coordinate_generation(self): + step_conf = { + _SBE.STEPID: "01_conf_genXTB", + _SBE.STEP_TYPE: _SBE.STEP_XTB, + _SBE.EXEC: { + _SBE.EXEC_BINARYLOCATION: attach_root_path( + os.path.join(MAIN_CONFIG["XTBHOME"]) + ), + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 7}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _CE.XTB_OPT: "vtight", + _CE.XTB_GBSA: "h2o", + }, + } + }, + } + xtb_step = StepXTB(**step_conf) + xtb_step.data.compounds = [self._paracetamol_molecule] + confs = get_mol_as_Conformer( + attach_root_path(PATHS_EXAMPLEDATA.PARACETAMOL_MULTIPLE_CONF) + ) + xtb_step.data.compounds[0][0].add_conformers(confs, auto_update=True) + self.assertListEqual( + list( + xtb_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [0.8785, 0.6004, -0.2173], + ) + xtb_step.execute() + self.assertListEqual( + list( + xtb_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [0.8852, 0.6805, -0.1339], + ) + + # check number of conformers returned (only one Compound with only one Enumeration) + self.assertEqual(len(xtb_step.get_compounds()[0][0]), 2) + # check SDF write-out (including energy-as-tag annotation) + out_path = os.path.join( + self._test_dir, "XTB_conformers_from_OMEGA_paracetamol.sdf" + ) + xtb_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 4414) + + def test_single_core_execution(self): + step_conf = { + _SBE.STEPID: "01_conf_genXTB", + _SBE.STEP_TYPE: _SBE.STEP_XTB, + _SBE.EXEC: { + _SBE.EXEC_BINARYLOCATION: attach_root_path( + os.path.join(MAIN_CONFIG["XTBHOME"]) + ), + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 1}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _CE.XTB_OPT: "vtight", + _CE.XTB_GBSA: "h2o", + }, + } + }, + } + xtb_step = StepXTB(**step_conf) + xtb_step.data.compounds = self._medium_molecules + self.assertListEqual( + list( + xtb_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-2.5065, -0.0698, 5.1132], + ) + xtb_step.execute() + self.assertListEqual( + list( + xtb_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-2.0964, -0.6968, 4.0397], + ) + + # check number of conformers returned (only one Compound with only one Enumeration) + self.assertEqual(len(xtb_step.get_compounds()[0][0]), 1) + # check SDF write-out (including energy-as-tag annotation) + out_path = os.path.join( + self._test_dir, "XTB_conformers_from_OMEGA_paracetamol.sdf" + ) + xtb_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 6874) + + def test_parallel_execution(self): + step_conf = { + _SBE.STEPID: "01_conf_genXTB", + _SBE.STEP_TYPE: _SBE.STEP_XTB, + _SBE.EXEC: { + _SBE.EXEC_BINARYLOCATION: attach_root_path( + os.path.join(MAIN_CONFIG["XTBHOME"]) + ), + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 8}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _CE.XTB_OPT: "vtight", + _CE.XTB_GBSA: "h2o", + }, + } + }, + } + xtb_step = StepXTB(**step_conf) + xtb_step.data.compounds = self._medium_molecules + self.assertListEqual( + list( + xtb_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-2.5065, -0.0698, 5.1132], + ) + t1 = time.time() + xtb_step.execute() + t2 = time.time() + self.assertLess(t2 - t1, 4) + self.assertListEqual( + list( + xtb_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-2.0964, -0.6968, 4.0397], + ) + + # check number of conformers returned (only one Compound with only one Enumeration) + self.assertEqual(len(xtb_step.get_compounds()[0][0]), 1) + # check SDF write-out (including energy-as-tag annotation) + out_path = os.path.join( + self._test_dir, "XTB_conformers_from_OMEGA_paracetamol.sdf" + ) + xtb_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 6874) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/autodockvina/__init__.py b/tests/autodockvina/__init__.py new file mode 100644 index 0000000..1637d7b --- /dev/null +++ b/tests/autodockvina/__init__.py @@ -0,0 +1,2 @@ +from tests.autodockvina.test_adv_docking import * +from tests.autodockvina.test_adv_target_prep import * diff --git a/tests/autodockvina/test_adv_docking.py b/tests/autodockvina/test_adv_docking.py new file mode 100644 index 0000000..662f7e7 --- /dev/null +++ b/tests/autodockvina/test_adv_docking.py @@ -0,0 +1,95 @@ +import os +import unittest + +from icolos.core.workflow_steps.autodockvina.docking import StepAutoDockVina + +from icolos.utils.enums.step_enums import StepBaseEnum, StepAutoDockVinaEnum +from icolos.utils.enums.program_parameters import AutoDockVinaEnum + +from tests.tests_paths import PATHS_1UYD, get_1UYD_ligands_as_Compounds +from icolos.utils.general.files_paths import attach_root_path + +_SBE = StepBaseEnum +_SAE = StepAutoDockVinaEnum() +_EE = AutoDockVinaEnum() + + +class Test_ADV_docking(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/ADV") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + self._1UYD_compounds = get_1UYD_ligands_as_Compounds( + abs_path=PATHS_1UYD.LIGANDS + ) + self.receptor_path = PATHS_1UYD.PDBQT_PATH + + def test_ADV_run(self): + step_conf = { + _SBE.STEPID: "01_ADV", + _SBE.STEP_TYPE: _SBE.STEP_AUTODOCKVINA_DOCKING, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load AutoDock_Vina", + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 4}, + _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SAE.CONFIGURATION: { + _SAE.ADV_SEARCH_SPACE: { + _SAE.ADV_SEARCH_SPACE_CENTER_X: 3.3, + _SAE.ADV_SEARCH_SPACE_CENTER_Y: 11.5, + _SAE.ADV_SEARCH_SPACE_CENTER_Z: 24.8, + _SAE.ADV_SEARCH_SPACE_SIZE_Y: 10, + _SAE.ADV_SEARCH_SPACE_SIZE_Z: 10, + }, + _SAE.NUMBER_POSES: 2, + _SAE.ADV_RECEPTOR_PATH: self.receptor_path, + } + }, + }, + } + + adv_step = StepAutoDockVina(**step_conf) + adv_step.data.compounds = self._1UYD_compounds + + adv_step.execute() + self.assertEqual(len(adv_step.get_compounds()), 15) + self.assertEqual(len(adv_step.get_compounds()[0][0].get_conformers()), 2) + self.assertListEqual( + list( + adv_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [1.953, 10.113, 25.16], + ) + self.assertListEqual( + list( + adv_step.get_compounds()[14][0][1] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [3.682, 15.421, 26.244], + ) + self.assertEqual( + adv_step.get_compounds()[0][0][0] + .get_molecule() + .GetProp(_SBE.ANNOTATION_TAG_DOCKING_SCORE), + "-9.1", + ) + + # check SDF write-out + out_path = os.path.join(self._test_dir, "adv_docked.sdf") + adv_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 105000) diff --git a/tests/autodockvina/test_adv_target_prep.py b/tests/autodockvina/test_adv_target_prep.py new file mode 100644 index 0000000..b8e1c9a --- /dev/null +++ b/tests/autodockvina/test_adv_target_prep.py @@ -0,0 +1,90 @@ +import unittest +import os + +from icolos.core.workflow_steps.autodockvina.target_preparation import ( + StepAutoDockVinaTargetPreparation, +) +from icolos.utils.enums.step_enums import ( + StepBaseEnum, + StepAutoDockVinaTargetPreparationEnum, +) +from icolos.utils.general.files_paths import attach_root_path +from tests.tests_paths import PATHS_1UYD + +_SBE = StepBaseEnum +_SAE = StepAutoDockVinaTargetPreparationEnum() + + +class Test_ADV_target_preparation(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/ADV_target_prep") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + cls.receptor_output_path = os.path.join(cls._test_dir, "ADV_receptor.pdbqt") + + def setUp(self): + self.receptor_input_path = PATHS_1UYD.PDB_PATH + self.reference_ligand_sdf_path = PATHS_1UYD.NATIVE_LIGAND_SDF + self.reference_ligand_pdb_path = PATHS_1UYD.NATIVE_LIGAND_PDB + + @classmethod + def tearDownClass(cls): + pass + + def test_extract_box(self): + step_conf = { + _SBE.STEPID: "01_ADV", + _SBE.STEP_TYPE: _SBE.STEP_AUTODOCKVINA_TARGET_PREPARATION, + _SBE.EXEC: {}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SAE.INPUT_RECEPTOR_PDB: self.receptor_input_path, + _SAE.OUTPUT_RECEPTOR_PDBQT: self.receptor_output_path, + _SAE.EXTRACT_BOX: { + _SAE.EXTRACT_BOX_REFERENCE_LIGAND_PATH: self.reference_ligand_sdf_path, + _SAE.EXTRACT_BOX_REFERENCE_LIGAND_FORMAT: _SAE.EXTRACT_BOX_REFERENCE_LIGAND_FORMAT_SDF, + }, + }, + }, + } + + adv_tp_step = StepAutoDockVinaTargetPreparation(**step_conf) + x_coords, y_coords, z_coords = adv_tp_step._extract_box() + + self.assertEqual(len(x_coords), 28) + self.assertListEqual([4.403, 5.122, 5.091], x_coords[:3]) + self.assertListEqual([15.528, 15.084, 13.786], y_coords[:3]) + self.assertListEqual([26.579, 25.453, 24.846], z_coords[:3]) + + def test_target_preparation(self): + step_conf = { + _SBE.STEPID: "01_ADV", + _SBE.STEP_TYPE: _SBE.STEP_AUTODOCKVINA_TARGET_PREPARATION, + _SBE.EXEC: {}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SAE.INPUT_RECEPTOR_PDB: self.receptor_input_path, + _SAE.OUTPUT_RECEPTOR_PDBQT: self.receptor_output_path, + _SAE.EXTRACT_BOX: { + _SAE.EXTRACT_BOX_REFERENCE_LIGAND_PATH: self.reference_ligand_pdb_path, + _SAE.EXTRACT_BOX_REFERENCE_LIGAND_FORMAT: _SAE.EXTRACT_BOX_REFERENCE_LIGAND_FORMAT_PDB, + }, + }, + }, + } + + adv_tp_step = StepAutoDockVinaTargetPreparation(**step_conf) + adv_tp_step.execute() + + # check SDF write-out + stat_inf = os.stat(self.receptor_output_path) + self.assertGreater(stat_inf.st_size, 290000) diff --git a/tests/boltzmann_weighting/__init__.py b/tests/boltzmann_weighting/__init__.py new file mode 100644 index 0000000..6457ffb --- /dev/null +++ b/tests/boltzmann_weighting/__init__.py @@ -0,0 +1 @@ +from tests.boltzmann_weighting.test_boltzmann_weighting import Test_BoltzmannWeighting diff --git a/tests/boltzmann_weighting/test_boltzmann_weighting.py b/tests/boltzmann_weighting/test_boltzmann_weighting.py new file mode 100644 index 0000000..846d7d3 --- /dev/null +++ b/tests/boltzmann_weighting/test_boltzmann_weighting.py @@ -0,0 +1,130 @@ +import unittest +import os + +from icolos.core.containers.compound import Compound, Enumeration +from icolos.core.workflow_steps.calculation.boltzmann_weighting import ( + StepBoltzmannWeighting, +) + +from icolos.utils.enums.step_enums import StepBaseEnum, StepBoltzmannWeightingEnum + +from tests.tests_paths import PATHS_EXAMPLEDATA, get_mol_as_Conformer +from icolos.utils.general.files_paths import attach_root_path + +_SBE = StepBaseEnum +_SBWE = StepBoltzmannWeightingEnum() + + +class Test_BoltzmannWeighting(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/BoltzmannWeighting") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + # this file has the necessary properties for the different solvents annotated as tags + self._example_mol_path = ( + PATHS_EXAMPLEDATA.EPSA_BOLTZMANN_WEIGHTING_EXAMPLE_MOLECULE + ) + + @classmethod + def tearDownClass(cls): + pass + + def test_predict_ePSA_with_descriptors(self): + step_conf = { + _SBE.STEPID: "01_boltzmann_weighting", + _SBE.STEP_TYPE: _SBE.STEP_BOLTZMANN_WEIGHTING, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _SBWE.PROPERTIES: [ + { + _SBWE.PROPERTIES_INPUT: "G_h2o", + _SBWE.PROPERTIES_OUTPUT: "boltzfactor_wat", + }, + { + _SBWE.PROPERTIES_INPUT: "G_meoh", + _SBWE.PROPERTIES_OUTPUT: "boltzfactor_meoh", + }, + { + _SBWE.PROPERTIES_INPUT: "G_octanol", + _SBWE.PROPERTIES_OUTPUT: "boltzfactor_octanol", + }, + { + _SBWE.PROPERTIES_INPUT: "G_dmso", + _SBWE.PROPERTIES_OUTPUT: "boltzfactor_dmso", + }, + { + _SBWE.PROPERTIES_INPUT: "G_cychex", + _SBWE.PROPERTIES_OUTPUT: "boltzfactor_cychex", + }, + { + _SBWE.PROPERTIES_INPUT: "G_chcl3", + _SBWE.PROPERTIES_OUTPUT: "boltzfactor_chcl3", + }, + { + _SBWE.PROPERTIES_INPUT: "G_acn", + _SBWE.PROPERTIES_OUTPUT: "boltzfactor_acn", + }, + { + _SBWE.PROPERTIES_INPUT: "G_thf", + _SBWE.PROPERTIES_OUTPUT: "boltzfactor_thf", + }, + ], + _SBWE.WEIGHT: { + _SBWE.WEIGHT_INPUT: [ + "area", + "HB_acc", + "volume", + "HB_don", + "sigma2", + "Gsolv_meoh", + ], + _SBWE.WEIGHT_OUTPUT_PREFIX: "bf_weighted", + _SBWE.WEIGHT_PROPERTIES: [ + "boltzfactor_dmso", + "boltzfactor_wat", + "boltzfactor_meoh", + "boltzfactor_cychex", + ], + }, + } + } + }, + } + bweigh_step = StepBoltzmannWeighting(**step_conf) + bweigh_step.get_compounds().append(Compound()) + bweigh_step.get_compounds()[0].add_enumeration(Enumeration(), auto_update=True) + conformer = get_mol_as_Conformer(self._example_mol_path) + bweigh_step.data.compounds[0][0].add_conformers(conformer, auto_update=True) + bweigh_step.execute() + + self.assertEqual(len(bweigh_step.get_compounds()), 1) + self.assertEqual(len(bweigh_step.get_compounds()[0]), 1) + self.assertEqual(len(bweigh_step.get_compounds()[0][0]), 1) + + self.assertListEqual( + list( + bweigh_step.get_compounds()[0][0] + .get_conformers()[0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [1.9524, -0.9976, -1.5113], + ) + self.assertEqual( + 19, + bweigh_step.get_compounds()[0][0] + .get_conformers()[0] + .get_molecule() + .GetNumAtoms(), + ) + + # check SDF write-out (including Boltzmann-weighted properties as tags) + out_path = os.path.join(self._test_dir, "boltzmann_weighted_annotated.sdf") + bweigh_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 4419) diff --git a/tests/cavity_explorer/__init__.py b/tests/cavity_explorer/__init__.py new file mode 100644 index 0000000..6b32d37 --- /dev/null +++ b/tests/cavity_explorer/__init__.py @@ -0,0 +1 @@ +from tests.cavity_explorer.test_md_pocket import * diff --git a/tests/cavity_explorer/test_md_pocket.py b/tests/cavity_explorer/test_md_pocket.py new file mode 100644 index 0000000..317035d --- /dev/null +++ b/tests/cavity_explorer/test_md_pocket.py @@ -0,0 +1,115 @@ +from icolos.core.containers.generic import GenericData +import unittest +from icolos.utils.enums.step_enums import StepCavExploreEnum, StepBaseEnum +from tests.tests_paths import PATHS_EXAMPLEDATA +from icolos.core.workflow_steps.cavity_explorer.mdpocket import StepMDpocket +from icolos.utils.general.files_paths import attach_root_path + +import os + +_SBE = StepBaseEnum +_SFP = StepCavExploreEnum() + + +class Test_MDPocket(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/cavity_explorer") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + # read in the example files here + self.cav_folder = PATHS_EXAMPLEDATA.CAVITY_TRJ_FOLDER + with open(PATHS_EXAMPLEDATA.CAVITY_DTR_FILE, "rb") as f: + self.dtr_data = f.read() + with open(PATHS_EXAMPLEDATA.MD_POCKET_DESMOND_TOP, "r") as f: + self.desmond_pdb = f.read() + with open(PATHS_EXAMPLEDATA.MDPOCKET_XTC_FILE, "rb") as f: + self.xtc_data = f.read() + with open(PATHS_EXAMPLEDATA.MDPOCKET_PDB_FILE, "r") as f: + self.pdb_file = f.read() + + @classmethod + def tearDownClass(cls): + pass + + def test_MDpocket_desmond(self): + step_conf = { + _SBE.STEPID: "01_cavity_explorer_file_preparation", + _SBE.STEP_TYPE: _SBE.STEP_MDPOCKET, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load fpocket", + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 8, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1, + }, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ADDITIONAL: { + _SFP.SELECTION_TEXT: _SFP.PROTEIN, + _SFP.TRAJ_TYPE: "desmond", + } + }, + } + + mdpocket_step = StepMDpocket(**step_conf) + mdpocket_step.data.generic.add_file( + GenericData( + file_name="trj_folder", file_data=self.cav_folder, argument=False + ) + ) + mdpocket_step.data.generic.add_file( + GenericData( + file_name="structure.pdb", file_data=self.desmond_pdb, argument=True + ) + ) + mdpocket_step.data.generic.add_file( + GenericData(file_name="clickme.dtr", file_data=self.dtr_data, argument=True) + ) + mdpocket_step.execute() + + out_path = os.path.join(self._test_dir, "pocket_0_descriptors.txt") + mdpocket_step.write_generic_by_extension(self._test_dir, "txt") + + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 187400) + + def test_MDpocket_xtc(self): + step_conf = { + _SBE.STEPID: "01_cavity_explorer_file_preparation", + _SBE.STEP_TYPE: _SBE.STEP_MDPOCKET, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load fpocket", + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 4, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 2, # this will be automatically overwritten + }, + _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ADDITIONAL: { + _SFP.SELECTION_TEXT: _SFP.PROTEIN, + _SFP.TRAJ_TYPE: "gromacs", + } + }, + } + + mdpocket_step = StepMDpocket(**step_conf) + + mdpocket_step.data.generic.add_file( + GenericData( + file_name="structure.xtc", file_data=self.xtc_data, argument=True + ) + ) + mdpocket_step.data.generic.add_file( + GenericData( + file_name="structure.pdb", file_data=self.pdb_file, argument=True + ) + ) + mdpocket_step.execute() + + out_path = os.path.join(self._test_dir, "pocket_1_descriptors.txt") + mdpocket_step.write_generic_by_name(self._test_dir, "pocket_1_descriptors.txt") + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 700) diff --git a/tests/clustering/__init__.py b/tests/clustering/__init__.py new file mode 100644 index 0000000..969d71d --- /dev/null +++ b/tests/clustering/__init__.py @@ -0,0 +1 @@ +from tests.clustering.test_clustering import * diff --git a/tests/clustering/test_clustering.py b/tests/clustering/test_clustering.py new file mode 100644 index 0000000..4b38264 --- /dev/null +++ b/tests/clustering/test_clustering.py @@ -0,0 +1,91 @@ +import unittest + +from icolos.core.containers.compound import Compound, Enumeration +from icolos.core.workflow_steps.calculation.clustering import StepClustering + +from icolos.utils.enums.step_enums import StepBaseEnum, StepClusteringEnum + +from tests.tests_paths import PATHS_EXAMPLEDATA, get_mol_as_Conformer + +_SBE = StepBaseEnum +_SC = StepClusteringEnum() + + +class Test_Clustering(unittest.TestCase): + @classmethod + def setUpClass(cls): + pass + + def setUp(self): + pass + + @classmethod + def tearDownClass(cls): + pass + + def test_Clustering(self): + step_conf = { + _SBE.STEPID: "01_clustering", + _SBE.STEP_TYPE: _SBE.STEP_CLUSTERING, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _SC.N_CLUSTERS: 3, + _SC.MAX_ITER: 300, + }, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SC.TOP_N_PER_SOLVENT: 3, + _SC.FEATURES: ["area", "dipole", "HB_acc"], + _SC.FREE_ENERGY_SOLVENT_TAGS: [ + "G_h2o", + "G_meoh", + "G_octanol", + "G_dmso", + "G_cychex", + "G_acn", + "G_thf", + ], + }, + }, + } + + cl_step = StepClustering(**step_conf) + cl_step.get_compounds().append(Compound(compound_number=1)) + cl_step.get_compounds()[0].add_enumeration(Enumeration(), auto_update=True) + conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS) + cl_step.data.compounds[0][0].add_conformers(conformers, auto_update=True) + + # 11 conformers are put in, but due to clustering only 10 should come out; note, that if only one solvent was + # selected, only 9 would be outputted (this is because 2 of the clusters have 4 members and TOP_N_PER_SOLVENT + # is set to 3) + self.assertEqual(len(cl_step.get_compounds()[0][0].get_conformers()), 11) + cl_step.execute() + self.assertEqual(len(cl_step.get_compounds()[0][0].get_conformers()), 10) + + # make sure it is the 10th element (index 9) that has been removed + self.assertListEqual( + [ + list( + cl_step.get_compounds()[0][0] + .get_conformers()[i] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ) + for i in range(10) + ], + [ + [0.8838, 0.6808, -0.1373], + [-4.2269, -0.441, 0.2359], + [-4.1693, -0.532, -0.0567], + [-4.2326, -0.4701, 0.3534], + [-4.201, -0.5446, 0.131], + [-4.2198, -0.4705, 0.1656], + [-4.2318, -0.444, 0.2474], + [-4.2316, -0.14, 0.0848], + [-4.1953, -0.1989, -0.1017], + [-4.1528, -0.0208, 0.0932], + ], + ) diff --git a/tests/composite_agents/__init__.py b/tests/composite_agents/__init__.py new file mode 100644 index 0000000..10c612c --- /dev/null +++ b/tests/composite_agents/__init__.py @@ -0,0 +1 @@ +from tests.composite_agents.test_workflow import * diff --git a/tests/composite_agents/test_workflow.py b/tests/composite_agents/test_workflow.py new file mode 100644 index 0000000..37abeb1 --- /dev/null +++ b/tests/composite_agents/test_workflow.py @@ -0,0 +1,451 @@ +import unittest +import os +from rdkit import Chem + +from icolos.core.composite_agents.workflow import WorkFlow + +from icolos.utils.enums.step_enums import StepBaseEnum +from icolos.utils.enums.composite_agents_enums import WorkflowEnum +from icolos.utils.enums.program_parameters import OMEGAEnum +from icolos.utils.enums.program_parameters import XTBEnum +from icolos.utils.enums.program_parameters import CrestEnum +from icolos.utils.enums.program_parameters import TurbomoleEnum +from icolos.utils.enums.program_parameters import PantherEnum +from icolos.core.steps_utils import initialize_step_from_dict + +from tests.tests_paths import PATHS_EXAMPLEDATA, MAIN_CONFIG +from icolos.utils.general.files_paths import attach_root_path + +_SBE = StepBaseEnum +_WE = WorkflowEnum() +_OE = OMEGAEnum() +_XE = XTBEnum() +_CE = CrestEnum() +_TE = TurbomoleEnum() +_PE = PantherEnum() + + +class Test_workflow(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/workflow") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + _paracetamol_path = PATHS_EXAMPLEDATA.PARACETAMOL_PATH + mol_supplier = Chem.SDMolSupplier(_paracetamol_path, removeHs=False) + for mol in mol_supplier: + self._molecule = mol + + # TODO: move header variables to MAIN_CONFIG + self._HEADER_EXPORT = { + _WE.ENVIRONMENT_EXPORT: [ + { + _WE.ENVIRONMENT_EXPORT_KEY: "OE_LICENSE", + _WE.ENVIRONMENT_EXPORT_VALUE: "/opt/scp/software/oelicense/1.0/oe_license.seq1", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "XTBHOME", + _WE.ENVIRONMENT_EXPORT_VALUE: "/opt/scp/services/reinvent/Icolos/binaries/xtb-6.3.2", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "XTBPATH", + _WE.ENVIRONMENT_EXPORT_VALUE: "${XTBHOME}/share/xtb", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "PATH", + _WE.ENVIRONMENT_EXPORT_VALUE: "${PATH}:${XTBHOME}/bin", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "PKG_CONFIG_PATH", + _WE.ENVIRONMENT_EXPORT_VALUE: "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig", + }, + ] + } + + @classmethod + def tearDownClass(cls): + pass + + def test_workflow_initialization(self): + conf = { + _WE.HEADER: {_WE.ID: "test_workflow", _WE.ENVIRONMENT: self._HEADER_EXPORT}, + _WE.STEPS: [ + { + _SBE.STEPID: "crest_confgen", + _SBE.STEP_TYPE: _SBE.STEP_CREST, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: None, + _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["CREST_BINARY_LOCATION"], + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 7}, + }, + }, + { + _SBE.STEPID: "omega_confgen", + _SBE.STEP_TYPE: _SBE.STEP_OMEGA, + _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load omega"}, + }, + ], + } + wflow = WorkFlow(**conf) + wflow.initialize() + self.assertEqual(len(wflow.steps), 2) + wflow.add_step( + initialize_step_from_dict( + { + _SBE.STEPID: "omega_confgen2", + _SBE.STEP_TYPE: _SBE.STEP_OMEGA, + _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load omega"}, + } + ) + ) + self.assertEqual(len(wflow.steps), 2) + self.assertEqual(len(wflow.get_steps()), 3) + + def test_workflow_with_global_variables(self): + out_path = os.path.join(self._test_dir, "global_variables_out.sdf") + conf = { + _WE.HEADER: { + _WE.ID: "test_workflow", + _WE.DESCRIPTION: "this is a test description", + _WE.ENVIRONMENT: self._HEADER_EXPORT, + _WE.GLOBAL_VARIABLES: { + "root_dir": attach_root_path(""), + }, + }, + _WE.STEPS: [ + { + _SBE.STEPID: "01_initialization", + _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: PATHS_EXAMPLEDATA.PARACETAMOL_PATH, + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE, + _SBE.INPUT_FORMAT: _SBE.FORMAT_SDF, + } + ] + }, + } + ], + } + wflow = WorkFlow(**conf) + wflow.initialize() + wflow.execute() + + def test_workflow_execution(self): + conf = { + _WE.HEADER: { + _WE.ID: "test_workflow", + _WE.DESCRIPTION: "this is a test description", + _WE.ENVIRONMENT: self._HEADER_EXPORT, + }, + _WE.STEPS: [ + { + _SBE.STEPID: "01a_initialization", + _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: PATHS_EXAMPLEDATA.PARACETAMOL_PATH, + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE, + _SBE.INPUT_FORMAT: _SBE.FORMAT_SDF, + } + ] + }, + }, + { + _SBE.STEPID: "01b_initialization", + _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: PATHS_EXAMPLEDATA.ASPIRIN_PATH, + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE, + _SBE.INPUT_FORMAT: _SBE.FORMAT_SDF, + } + ] + }, + }, + { + _SBE.STEPID: "02_omega_confgen", + _SBE.STEP_TYPE: _SBE.STEP_OMEGA, + _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load omega"}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _OE.CLASSIC_MAXCONFS: 10, + _OE.CLASSIC_RMS: 0.0, + }, + } + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "01a_initialization", + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP, + }, + { + _SBE.INPUT_SOURCE: "01b_initialization", + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP, + }, + ], + _SBE.INPUT_MERGE: { + _SBE.INPUT_MERGE_COMPOUNDS: True, + _SBE.INPUT_MERGE_COMPOUNDS_BY: "id", + _SBE.INPUT_MERGE_ENUMERATIONS: True, + _SBE.INPUT_MERGE_ENUMERATIONS_BY: "id", + }, + }, + }, + { + _SBE.STEPID: "02_conf_gen_crest", + _SBE.STEP_TYPE: _SBE.STEP_CREST, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: None, + _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["CREST_BINARY_LOCATION"], + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 7}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-niceprint"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _CE.CREST_OPT: "normal", + _CE.CREST_G: "h2o", + _CE.CREST_RTHR: 0.5, + _CE.CREST_ETHR: 0.25, + _CE.CREST_EWIN: 8.0, + _CE.CREST_PTHR: 0.4, + _CE.CREST_BTHR: 0.02, + }, + } + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "01a_initialization", + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP, + } + ] + }, + }, + { + _SBE.STEPID: "01_conf_genXTB", + _SBE.STEP_TYPE: _SBE.STEP_XTB, + _SBE.EXEC: { + _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["XTBHOME"], + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 7}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _XE.XTB_OPT: "vtight", + _XE.XTB_GBSA: "h2o", + }, + } + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "02_omega_confgen", + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP, + }, + { + _SBE.INPUT_SOURCE: "02_conf_gen_crest", + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP, + }, + ], + _SBE.INPUT_MERGE: { + _SBE.INPUT_MERGE_COMPOUNDS: True, + _SBE.INPUT_MERGE_COMPOUNDS_BY: "id", + _SBE.INPUT_MERGE_ENUMERATIONS: True, + _SBE.INPUT_MERGE_ENUMERATIONS_BY: "id", + }, + }, + }, + ], + } + wflow = WorkFlow(**conf) + wflow.initialize() + wflow.execute() + + # check SDF write-out (including energy-as-tag annotation) + out_path = os.path.join(self._test_dir, "02a_omega_confgen.sdf") + wflow[2].write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertGreaterEqual(stat_inf.st_size, 4252) + out_path = os.path.join(self._test_dir, "02b_crest_confgen.sdf") + wflow[3].write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertGreaterEqual(stat_inf.st_size, 47156) + out_path = os.path.join(self._test_dir, "03_XTB_from_omega_and_crest.sdf") + wflow[4].write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertGreaterEqual(stat_inf.st_size, 52807) + + def test_ePSA_workflow_execution(self): + conf = { + _WE.HEADER: { + _WE.ID: "test_workflow", + _WE.DESCRIPTION: "this is a test description", + _WE.ENVIRONMENT: self._HEADER_EXPORT, + }, + _WE.STEPS: [ + { + _SBE.STEPID: "01_initialization_paracetamol", + _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: PATHS_EXAMPLEDATA.PARACETAMOL_PATH, + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE, + _SBE.INPUT_FORMAT: _SBE.FORMAT_SDF, + } + ] + }, + }, + { + _SBE.STEPID: "01_initialization_aspirin", + _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: PATHS_EXAMPLEDATA.ASPIRIN_PATH, + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE, + _SBE.INPUT_FORMAT: _SBE.FORMAT_SDF, + } + ] + }, + }, + { + _SBE.STEPID: "02_omega_confgen", + _SBE.STEP_TYPE: _SBE.STEP_OMEGA, + _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load omega"}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _OE.CLASSIC_MAXCONFS: 200, + _OE.CLASSIC_RMS: 0.0, + _OE.CLASSIC_CANON_ORDER: "false", + }, + } + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "01_initialization_paracetamol", + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP, + }, + { + _SBE.INPUT_SOURCE: "01_initialization_aspirin", + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP, + }, + ] + }, + }, + { + _SBE.STEPID: "03_conf_optXTB", + _SBE.STEP_TYPE: _SBE.STEP_XTB, + _SBE.EXEC: { + _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["XTBHOME"], + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 7}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _XE.XTB_OPT: "vtight", + _XE.XTB_GBSA: "h2o", + }, + } + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "02_omega_confgen", + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP, + } + ] + }, + }, + { + _SBE.STEPID: "04_turbomole", + _SBE.STEP_TYPE: _SBE.STEP_TURBOMOLE, + _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load turbomole/73"}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _TE.TM_CONFIG_DIR: "/projects/cc/mai/material/Icolos/turbomole_config", + _TE.TM_CONFIG_BASENAME: "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge", + _TE.TM_CONFIG_COSMO: "/projects/cc/mai/material/Icolos/turbomole_config/cosmoprep_eps80.tm", + }, + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "03_conf_optXTB", + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP, + } + ] + }, + }, + { + _SBE.STEPID: "05_cosmo", + _SBE.STEP_TYPE: _SBE.STEP_COSMO, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load COSMOtherm/19.0.4" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + } + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "04_turbomole", + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP, + } + ] + }, + _SBE.WRITEOUT: [ + { + _SBE.WRITEOUT_COMP: { + _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE, + _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_SDF, + _SBE.WRITEOUT_DESTINATION_RESOURCE: os.path.join( + self._test_dir, "05_cosmo_ePSA_workflow.sdf" + ), + }, + } + ], + }, + ], + } + wflow = WorkFlow(**conf) + wflow.initialize() + wflow.execute() + + # check SDF write-out (including energy-as-tag annotation) + out_path = os.path.join(self._test_dir, "02_omega_confgen.sdf") + wflow.find_step_by_step_id("02_omega_confgen").write_conformers(out_path) + self.assertGreater(8200, os.stat(out_path).st_size) + out_path = os.path.join(self._test_dir, "03_conf_optXTB.sdf") + wflow.find_step_by_step_id("03_conf_optXTB").write_conformers(out_path) + self.assertGreater(8200, os.stat(out_path).st_size) + out_path = os.path.join(self._test_dir, "04_turbomole.sdf") + wflow.find_step_by_step_id("04_turbomole").write_conformers(out_path) + self.assertGreater(82008, os.stat(out_path).st_size) + out_path = os.path.join(self._test_dir, "05_cosmo_ePSA_workflow.sdf") + self.assertGreater(12500, os.stat(out_path).st_size) diff --git a/tests/containers/__init__.py b/tests/containers/__init__.py new file mode 100644 index 0000000..1d0ae4d --- /dev/null +++ b/tests/containers/__init__.py @@ -0,0 +1,3 @@ +from tests.containers.test_compound import * +from tests.containers.test_generic import * +from tests.containers.test_perturbation_map import * diff --git a/tests/containers/test_compound.py b/tests/containers/test_compound.py new file mode 100644 index 0000000..0de273f --- /dev/null +++ b/tests/containers/test_compound.py @@ -0,0 +1,113 @@ +import unittest +import os +from copy import deepcopy +from rdkit import Chem + +from icolos.core.containers.compound import Conformer, Enumeration, Compound + +from icolos.utils.enums.compound_enums import ( + CompoundContainerEnum, + EnumerationContainerEnum, +) + +from tests.tests_paths import PATHS_EXAMPLEDATA +from icolos.utils.general.files_paths import attach_root_path + + +class Test_Compound(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._CC = CompoundContainerEnum() + cls._EC = EnumerationContainerEnum() + + cls._test_dir = attach_root_path("tests/junk/Compound") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + comp = Compound(name="test_molecule", compound_number=0) + enum1 = Enumeration(smile="", molecule=None) + self.e1_conf1 = Conformer(conformer_id=0) + self.e1_conf2 = Conformer(conformer_id=2) + enum1.add_conformer(self.e1_conf1, auto_update=True) + enum1.add_conformer(self.e1_conf2, auto_update=True) + enum2 = Enumeration(smile="", molecule=None) + self.e2_conf1 = Conformer(conformer_id=1) + self.e2_conf2 = Conformer(conformer_id=3) + self.e2_conf3 = Conformer(conformer_id=5) + enum2.add_conformer(self.e2_conf1, auto_update=False) + enum2.add_conformer(self.e2_conf2, auto_update=False) + enum2.add_conformer(self.e2_conf3, auto_update=False) + enum3 = Enumeration(smile="CCC", molecule=None, enumeration_id=4) + self.e3_conf1 = Conformer(conformer_id=0) + enum3.add_conformer(self.e3_conf1, auto_update=True) + comp.add_enumeration(enumeration=enum1, auto_update=True) + comp.add_enumeration(enumeration=enum2, auto_update=True) + comp.add_enumeration(enumeration=enum3, auto_update=False) + self.comp = comp + self.enum1 = enum1 + self.enum2 = enum2 + self.enum3 = enum3 + + _paracetamol_path = attach_root_path(PATHS_EXAMPLEDATA.PARACETAMOL_PATH) + mol_supplier = Chem.SDMolSupplier(_paracetamol_path, removeHs=False) + for mol in mol_supplier: + self._paracetamol_molecule = mol + _aspirin_path = attach_root_path(PATHS_EXAMPLEDATA.ASPIRIN_PATH) + mol_supplier = Chem.SDMolSupplier(_aspirin_path, removeHs=False) + for mol in mol_supplier: + self._aspirin_molecule = mol + + @classmethod + def tearDownClass(cls): + pass + + def test_general_handling(self): + # Enumeration + self.assertEqual(len(self.comp), 3) + l_enums = self.comp.get_enumerations() + self.assertEqual(l_enums[0].get_compound_object(), self.comp) + self.assertEqual(l_enums[1].get_enumeration_id(), 1) + self.assertIsNone(l_enums[2].get_compound_object()) + self.assertEqual(l_enums[2].get_enumeration_id(), 4) + self.assertEqual(self.comp[2].get_enumeration_id(), 4) + + self.assertRaises(IndexError, self.comp.find_enumeration, 3) + self.assertEqual( + self.comp.find_enumeration(enumeration_id=4).get_smile(), "CCC" + ) + + self.assertListEqual([0, 1, 4], self.comp.get_enumeration_ids()) + self.comp.reset_enumeration_ids() + self.assertListEqual([0, 1, 2], self.comp.get_enumeration_ids()) + + # Conformer + self.assertEqual(len(self.comp.find_enumeration(1)), 3) + self.assertEqual(self.comp[1][1].get_conformer_id(), 3) + self.assertListEqual([0, 1], self.comp[0].get_conformer_ids()) + self.assertListEqual([1, 3, 5], self.comp[1].get_conformer_ids()) + + # Deletion + self.comp[1].clear_conformers() + self.assertEqual(len(self.comp[1]), 0) + self.comp.clear_enumerations() + self.assertEqual(len(self.comp), 0) + + def test_cloning_and_resetting(self): + comp_clone = deepcopy(self.comp) + comp_clone[0].set_enumeration_id(10) + self.assertListEqual([0, 1, 4], self.comp.get_enumeration_ids()) + self.assertListEqual([10, 1, 4], comp_clone.get_enumeration_ids()) + + all_conf_ids = [] + for enum in self.comp: + for conf in enum: + all_conf_ids.append(conf.get_conformer_id()) + self.assertListEqual([0, 1, 1, 3, 5, 0], all_conf_ids) + + comp_clone.reset_all_ids() + all_conf_ids = [] + for enum in comp_clone: + for conf in enum: + all_conf_ids.append(conf.get_conformer_id()) + self.assertListEqual([0, 1, 0, 1, 2, 0], all_conf_ids) diff --git a/tests/containers/test_generic.py b/tests/containers/test_generic.py new file mode 100644 index 0000000..207a5d6 --- /dev/null +++ b/tests/containers/test_generic.py @@ -0,0 +1,32 @@ +import unittest +import os + +from icolos.core.containers.generic import GenericData, GenericContainer + +from tests.tests_paths import PATHS_EXAMPLEDATA +from icolos.utils.general.files_paths import attach_root_path + + +class Test_Generic(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._GC = GenericContainer() + + cls._test_dir = attach_root_path("tests/junk/Generic") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + gc = GenericContainer() + with open(PATHS_EXAMPLEDATA.FEP_PLUS_DOCKING_PV, "rb") as f: + data = f.read() + gc.add_file( + GenericData(file_name="test_file.txt", file_data=data, argument=True) + ) + self.generic = gc + + def test_GenericHandling(self): + self.assertEqual(len(self.generic.get_flattened_files()), 1) + self.assertEqual( + self.generic.get_file_by_name("test_file.txt").get_extension(), "txt" + ) diff --git a/tests/containers/test_perturbation_map.py b/tests/containers/test_perturbation_map.py new file mode 100644 index 0000000..b15ba60 --- /dev/null +++ b/tests/containers/test_perturbation_map.py @@ -0,0 +1,44 @@ +from icolos.core.containers.perturbation_map import PerturbationMap +import unittest +import os +from icolos.core.containers.generic import GenericData +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + construct_full_compound_object, +) +from icolos.utils.general.files_paths import attach_root_path + + +class Test_PerturbationMap(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + + cls._test_dir = attach_root_path("tests/junk/perturbation_map") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + compounds = construct_full_compound_object( + PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS, + ) + with open(PATHS_EXAMPLEDATA.FEP_PLUS_PROTEIN, "r") as f: + data = f.read() + protein = GenericData(file_name="protein.pdb", file_data=data) + p_map = PerturbationMap(compounds=compounds, protein=protein) + + p_map.parse_map_file(PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG) + self.p_map = p_map + + def test_perturbation_map(self): + self.assertEqual(len(self.p_map.nodes), 38) + self.assertEqual(len(self.p_map.edges), 62) + self.assertEqual( + self.p_map.nodes[5].get_conformer().get_enumeration_object().get_smile(), + "[H]c1nc(N([H])c2c([H])c(C(=O)N([H])[H])c([H])c(N([H])S(=O)(=O)C([H])([H])[H])c2[H])nc(N([H])c2c(Cl)c([H])c([H])c3c2OC([H])([H])O3)c1[H]", + ) + + def test_vis_map(self): + self.p_map.visualise_perturbation_map(self._test_dir) + filepath = os.path.join(self._test_dir, "vmap.html") + stat_inf = os.stat(filepath) + self.assertGreater(stat_inf.st_size, 13300) diff --git a/tests/cosmo/__init__.py b/tests/cosmo/__init__.py new file mode 100644 index 0000000..9010319 --- /dev/null +++ b/tests/cosmo/__init__.py @@ -0,0 +1 @@ +from tests.cosmo.test_Cosmo import * diff --git a/tests/cosmo/test_Cosmo.py b/tests/cosmo/test_Cosmo.py new file mode 100644 index 0000000..6d0eb22 --- /dev/null +++ b/tests/cosmo/test_Cosmo.py @@ -0,0 +1,174 @@ +import unittest +import os + +from icolos.core.workflow_steps.calculation.cosmo import StepCosmo + +from icolos.utils.enums.step_enums import StepBaseEnum +from icolos.utils.enums.program_parameters import TurbomoleEnum + +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + export_unit_test_env_vars, + get_mol_as_Compound, + get_mol_as_Conformer, +) +from icolos.utils.enums.compound_enums import ConformerContainerEnum +from icolos.utils.general.files_paths import attach_root_path + + +_SBE = StepBaseEnum +_TE = TurbomoleEnum() +_CTE = ConformerContainerEnum() + + +class Test_Cosmo(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/Cosmo") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + # initialize a Compound with 1 Enumeration and 2 Conformers (done by OMEGA) + _paracetamol_molecule = get_mol_as_Compound(PATHS_EXAMPLEDATA.PARACETAMOL_PATH) + conf = get_mol_as_Conformer(PATHS_EXAMPLEDATA.PARACETAMOL_MULTIPLE_CONF)[0] + with open(PATHS_EXAMPLEDATA.PARACETAMOL_COSMO, "r") as f: + cosmofile = f.readlines() + conf.add_extra_data(key=_CTE.EXTRA_DATA_COSMOFILE, data=cosmofile) + _paracetamol_molecule[0].add_conformer(conf, auto_update=True) + self._paracetamol_molecule = _paracetamol_molecule + + @classmethod + def tearDownClass(cls): + pass + + def test_Cosmo_output_parsing(self): + step_conf = { + _SBE.STEPID: "01_cosmo", + _SBE.STEP_TYPE: _SBE.STEP_COSMO, + _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load COSMOtherm/20.0.0"}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + } + }, + } + cosmo_step = StepCosmo(**step_conf) + cosmo_step.data.compounds = [self._paracetamol_molecule] + cosmo_output_path = PATHS_EXAMPLEDATA.PARACETAMOL_COSMO_OUTPUT + cosmo_step._parse_output( + path_output=cosmo_output_path, conformer=cosmo_step.get_compounds()[0][0][0] + ) + + # test general block + self.assertEqual( + cosmo_step.get_compounds()[0][0][0].get_molecule().GetProp("E_cosmo"), + "-943302.2152", + ) + + # test solvent blocks + self.assertEqual( + cosmo_step.get_compounds()[0][0][0].get_molecule().GetProp("Gsolv_meoh"), + "-24.59517", + ) + self.assertEqual( + cosmo_step.get_compounds()[0][0][0].get_molecule().GetProp("Gsolv_h2o"), + "-23.47666", + ) + self.assertEqual( + cosmo_step.get_compounds()[0][0][0].get_molecule().GetProp("G_propanone"), + "-943303.47354", + ) + try: + self.assertEqual( + cosmo_step.get_compounds()[0][0][0] + .get_molecule() + .GetProp("G_propanonee"), + "", + ) + except KeyError as e: + self.assertEqual("'G_propanonee'", str(e)) + + def test_Cosmo_run(self): + step_conf = { + _SBE.STEPID: "01_cosmo", + _SBE.STEP_TYPE: _SBE.STEP_COSMO, + _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load COSMOtherm/20.0.0"}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _TE.CT_CONFIG: [ + 'ctd = BP_TZVPD_FINE_20.ctd cdir = "/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/CTDATA-FILES" ldir = "/opt/scp/software/COSMOtherm/20.0.0/licensefiles"', + "unit notempty wtln ehfile", + "!! generated by COSMOthermX !!", + "f = mol.cosmo", + 'f = "h2o_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/h" VPfile', + 'f = "methanol_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/m" VPfile', + 'f = "1-octanol_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" Comp = "1-octanol" [ VPfile', + 'f = "1-octanol_c1.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"', + 'f = "1-octanol_c2.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"', + 'f = "1-octanol_c3.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"', + 'f = "1-octanol_c4.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"', + 'f = "1-octanol_c5.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"', + 'f = "1-octanol_c6.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" ]', + 'f = "dimethylsulfoxide_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/d" VPfile', + 'f = "cyclohexane_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/c" VPfile', + 'f = "chcl3_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/c" VPfile', + 'f = "propanone_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/p" VPfile', + 'f = "acetonitrile_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/a" VPfile', + 'f = "thf_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/t" VPfile', + "henry= 2 tc=25.0 GSOLV", + "henry= 3 tc=25.0 GSOLV", + "henry= 4 tc=25.0 GSOLV", + "henry= 5 tc=25.0 GSOLV", + "henry= 6 tc=25.0 GSOLV", + "henry= 7 tc=25.0 GSOLV", + "henry= 8 tc=25.0 GSOLV", + "henry= 9 tc=25.0 GSOLV", + "henry= 10 tc=25.0 GSOLV", + ] + }, + } + }, + } + cosmo_step = StepCosmo(**step_conf) + cosmo_step.data.compounds = [self._paracetamol_molecule] + + # conformer coordinates should not be touched by the execution + self.assertListEqual( + list( + cosmo_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [0.8785, 0.6004, -0.2173], + ) + cosmo_step.execute() + self.assertListEqual( + list( + cosmo_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [0.8785, 0.6004, -0.2173], + ) + self.assertEqual( + cosmo_step.get_compounds()[0][0][0].get_molecule().GetProp("Gsolv_h2o"), + "-23.47666", + ) + cosmofile = cosmo_step.get_compounds()[0][0][0].get_extra_data()[ + _CTE.EXTRA_DATA_COSMOFILE + ] + self.assertTrue("basgrd points= 9806" in cosmofile[5]) + + # check write-out + out_path = os.path.join(self._test_dir, "cosmo_output_files.sdf") + cosmo_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 3079) diff --git a/tests/esp_sim/__init__.py b/tests/esp_sim/__init__.py new file mode 100644 index 0000000..2920888 --- /dev/null +++ b/tests/esp_sim/__init__.py @@ -0,0 +1 @@ +from tests.esp_sim.test_esp_sim import * diff --git a/tests/esp_sim/test_esp_sim.py b/tests/esp_sim/test_esp_sim.py new file mode 100644 index 0000000..9dd1275 --- /dev/null +++ b/tests/esp_sim/test_esp_sim.py @@ -0,0 +1,85 @@ +import unittest +from icolos.core.workflow_steps.calculation.electrostatics.esp_sim import StepEspSim + +from icolos.utils.enums.step_enums import StepBaseEnum +from tests.tests_paths import export_unit_test_env_vars + +_SBE = StepBaseEnum() + + +class Test_EspSim(unittest.TestCase): + @classmethod + def setUpClass(cls): + export_unit_test_env_vars() + + def setUp(self): + pass + + @classmethod + def tearDownClass(cls): + pass + + def test_initialize_compound_SDF(self): + step_conf = { + _SBE.STEPID: "01_esp_sim", + _SBE.STEP_TYPE: _SBE.STEP_ESP_SIM, + _SBE.EXEC: { + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 8, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1, + }, + _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 3}, + }, + _SBE.SETTINGS: {_SBE.SETTINGS_ADDITIONAL: {"ref_smiles": "C(C(C(=O)O)O)O"}}, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "C1=CC=C(C=C1)C(C(=O)O)O", + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STRING, + } + ] + }, + } + step_esp_sim = StepEspSim(**step_conf) + step_esp_sim.generate_input() + step_esp_sim.execute() + + esp_sim_score = [ + 0.8112564566774974, + 0.7940316946620978, + 0.8157010968264732, + 0.6927039160490105, + 0.6709748529493742, + 0.3780220716995563, + 0.7933792682013576, + 0.7672803082385128, + ] + + shape_sim_score = [ + 0.6419844502036283, + 0.9525606469002695, + 0.5686465433300876, + 0.5986955029179539, + 0.5460218408736349, + 0.5232662864004803, + 0.8305164319248827, + 0.7283643892339544, + ] + + for i in range(len(esp_sim_score)): + self.assertEqual( + step_esp_sim.data.compounds[i] + .get_enumerations()[0] + .get_conformers()[0] + .get_molecule() + .GetProp("esp_sim"), + str(esp_sim_score[i]), + ) + self.assertEqual( + step_esp_sim.data.compounds[i] + .get_enumerations()[0] + .get_conformers()[0] + .get_molecule() + .GetProp("shape_sim"), + str(shape_sim_score[i]), + ) diff --git a/tests/feature_counter/__init__.py b/tests/feature_counter/__init__.py new file mode 100644 index 0000000..f3e9aec --- /dev/null +++ b/tests/feature_counter/__init__.py @@ -0,0 +1 @@ +from tests.feature_counter.test_feature_counter import Test_FeatureCounter diff --git a/tests/feature_counter/test_feature_counter.py b/tests/feature_counter/test_feature_counter.py new file mode 100644 index 0000000..04c501c --- /dev/null +++ b/tests/feature_counter/test_feature_counter.py @@ -0,0 +1,102 @@ +import unittest + +from icolos.core.containers.compound import Compound, Enumeration +from icolos.core.workflow_steps.calculation.feature_counter import StepFeatureCounter +from icolos.utils.enums.program_parameters import FeatureCounterEnum + +from icolos.utils.enums.step_enums import StepBaseEnum, StepFeatureCounterEnum + +from tests.tests_paths import PATHS_EXAMPLEDATA, get_mol_as_Conformer + +_SBE = StepBaseEnum +_FC = FeatureCounterEnum() +_SFC = StepFeatureCounterEnum() + + +class Test_FeatureCounter(unittest.TestCase): + @classmethod + def setUpClass(cls): + pass + + def setUp(self): + comp0 = Compound(compound_number=0) + comp1 = Compound(compound_number=1) + comp0.add_enumeration(Enumeration(), auto_update=True) + comp1.add_enumeration(Enumeration(), auto_update=True) + comp0[0].add_conformers( + get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS), auto_update=True + ) + comp1[0].add_conformers( + get_mol_as_Conformer(PATHS_EXAMPLEDATA.MEDIUM_MOLECULES_SDF_PATH), + auto_update=True, + ) + self.comp0 = comp0 + self.comp1 = comp1 + + @classmethod + def tearDownClass(cls): + pass + + def test_ring_counting(self): + step_conf = { + _SBE.STEPID: "01_feature_counting", + _SBE.STEP_TYPE: _SBE.STEP_FEATURE_COUNTER, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {_SFC.FEATURE: _FC.PROPERTY_NUM_RINGS}, + }, + } + + fc_step = StepFeatureCounter(**step_conf) + fc_step.data.compounds = [self.comp0, self.comp1] + + fc_step.execute() + + self.assertEqual( + fc_step.get_compounds()[0][0][0] + .get_molecule() + .GetProp(_FC.PROPERTY_NUM_RINGS), + "2", + ) + self.assertEqual( + fc_step.get_compounds()[1][0][1] + .get_molecule() + .GetProp(_FC.PROPERTY_NUM_RINGS), + "2", + ) + + def test_aromatic_ring_counting(self): + step_conf = { + _SBE.STEPID: "01_feature_counting", + _SBE.STEP_TYPE: _SBE.STEP_FEATURE_COUNTER, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SFC.FEATURE: _FC.PROPERTY_NUM_AROMATIC_RINGS + }, + }, + } + + fc_step = StepFeatureCounter(**step_conf) + fc_step.data.compounds = [self.comp0, self.comp1] + + fc_step.execute() + + self.assertEqual( + fc_step.get_compounds()[0][0][0] + .get_molecule() + .GetProp(_FC.PROPERTY_NUM_AROMATIC_RINGS), + "2", + ) + self.assertEqual( + fc_step.get_compounds()[1][0][1] + .get_molecule() + .GetProp(_FC.PROPERTY_NUM_AROMATIC_RINGS), + "1", + ) diff --git a/tests/flow_control/__init__.py b/tests/flow_control/__init__.py new file mode 100644 index 0000000..4d83148 --- /dev/null +++ b/tests/flow_control/__init__.py @@ -0,0 +1 @@ +from tests.flow_control.test_iterator import * diff --git a/tests/flow_control/test_iterator.py b/tests/flow_control/test_iterator.py new file mode 100644 index 0000000..89807b6 --- /dev/null +++ b/tests/flow_control/test_iterator.py @@ -0,0 +1,197 @@ +import unittest +from icolos.core.flow_control.iterator import StepIterator +from icolos.core.workflow_steps.step import StepBase +from icolos.utils.enums.step_enums import StepBaseEnum, StepTurbomoleEnum +from icolos.utils.general.files_paths import attach_root_path +from icolos.utils.enums.program_parameters import TurbomoleEnum +import os +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + MAIN_CONFIG, +) + +_SBE = StepBaseEnum +_TE = TurbomoleEnum() +_STE = StepTurbomoleEnum() + + +class TestIterator(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/iterator") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self) -> None: + with open(PATHS_EXAMPLEDATA.GROMACS_HOLO_STRUCTURE_GRO, "r") as f: + self.structure = f.read() + + with open(PATHS_EXAMPLEDATA.MMPBSA_TOP, "r") as f: + self.topol = f.read() + + with open(PATHS_EXAMPLEDATA.MMPBSA_TPR, "rb") as f: + self.tpr_file = f.read() + + with open(PATHS_EXAMPLEDATA.MMPBSA_XTC, "rb") as f: + self.xtc_file = f.read() + + with open(PATHS_EXAMPLEDATA.MMPBSA_POSRE, "r") as f: + self.posre = f.read() + + with open(PATHS_EXAMPLEDATA.MMPBSA_LIG_ITP, "r") as f: + self.lig_itp = f.read() + + with open(PATHS_EXAMPLEDATA.MMPBSA_LIG_POSRE, "r") as f: + self.lig_posre = f.read() + + def test_single_initialization(self): + + full_conf = { + "base_config": [ + { + _SBE.STEPID: "01_turbomole", + _SBE.STEP_TYPE: _SBE.STEP_TURBOMOLE, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load turbomole/73", + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 1}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _TE.TM_CONFIG_DIR: MAIN_CONFIG["TURBOMOLE_CONFIG"], + _TE.TM_CONFIG_COSMO: os.path.join( + MAIN_CONFIG["TURBOMOLE_CONFIG"], "cosmoprep_eps80.tm" + ), + _STE.EXECUTION_MODE: _TE.TM_RIDFT, + }, + }, + } + ], + "iter_settings": { + "settings": { + "01_turbomole": { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + _SBE.SETTINGS_ADDITIONAL: { + _TE.TM_CONFIG_BASENAME: [ + "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge", + "blyp-ri-d3-def2-svp-int-coarse-charge2", + "some_other_spicy_functional", + ] + }, + } + }, + "iter_mode": "single", + "n_iters": 3, # for now this is manual, should match the number of settings to iterate over + }, + } + + step_iterator = StepIterator(**full_conf) + self.assertEqual(len(step_iterator.initialized_steps), 3) + for i in step_iterator.initialized_steps: + assert isinstance(i, StepBase) + + def test_multi_iter_initialization(self): + + full_conf = { + "base_config": [ + { + _SBE.STEPID: "01_turbomole", + _SBE.STEP_TYPE: _SBE.STEP_TURBOMOLE, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load turbomole/73", + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 1}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _TE.TM_CONFIG_DIR: MAIN_CONFIG["TURBOMOLE_CONFIG"], + _TE.TM_CONFIG_COSMO: os.path.join( + MAIN_CONFIG["TURBOMOLE_CONFIG"], "cosmoprep_eps80.tm" + ), + _TE.TM_CONFIG_BASENAME: "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge", + _STE.EXECUTION_MODE: _TE.TM_RIDFT, + }, + }, + } + ], + "iter_settings": { + # no changes in config, just run the same step through multiple times + "iter_mode": "n_iters", + "n_iters": 5, # for now this is manual, should match the number of settings to iterate over + }, + } + + step_iterator = StepIterator(**full_conf) + self.assertEqual(len(step_iterator.initialized_steps), 5) + for i in step_iterator.initialized_steps: + assert isinstance(i, StepBase) + + def test_single_initialization_parallel_execution(self): + """ + Test running multiple steps in parallel + """ + + full_conf = { + "base_config": [ + { + _SBE.STEPID: "test_mmgbsa", + _SBE.STEP_TYPE: _SBE.STEP_GMX_MMPBSA, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2 && module load gmx_MMPBSA/1.3.3-fosscuda-2019a-Python-3.7.2" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + "make_ndx_command": "Protein Other", + "pipe_input": "Protein Other", + }, + }, + } + ], + "iter_settings": { + "n_iters": 4, # for now this is manual, should match the number of settings to iterate over + "parallelizer_settings": { + "parallelize": True, + "cores": 4, + "max_lenth_sublists": 1, + }, + }, + } + + step_mmpbsa_job_control = StepIterator(**full_conf) + # step_mmpbsa.data.generic.add_file( + # GenericData(file_name="structure.gro", file_data=self.structure) + # ) + # step_mmpbsa.data.generic.add_file( + # GenericData(file_name="topol.top", file_data=self.topol) + # ) + # step_mmpbsa.data.generic.add_file( + # GenericData(file_name="structure.xtc", file_data=self.xtc_file) + # ) + # step_mmpbsa.data.generic.add_file( + # GenericData(file_name="structure.tpr", file_data=self.tpr_file) + # ) + # step_mmpbsa.data.generic.add_file( + # GenericData(file_name="posre.itp", file_data=self.posre) + # ) + # step_mmpbsa.data.generic.add_file( + # GenericData(file_name="DMP:100.itp", file_data=self.lig_itp) + # ) + # step_mmpbsa.data.generic.add_file( + # GenericData(file_name="posre_DMP:100.itp", file_data=self.lig_posre) + # ) + + # should return JobControl object + assert isinstance(step_mmpbsa_job_control.initialized_steps, StepBase) + # TODO: there isn't really a good way to unit test this, it is a pain to load the data in to the individual steps + # step_mmpbsa_job_control.initialized_steps.execute() diff --git a/tests/gromacs/__init__.py b/tests/gromacs/__init__.py new file mode 100644 index 0000000..566d8ae --- /dev/null +++ b/tests/gromacs/__init__.py @@ -0,0 +1,14 @@ +from tests.gromacs.test_editconf import * +from tests.gromacs.test_genion import * +from tests.gromacs.test_solvate import * +from tests.gromacs.test_mdrun import * +from tests.gromacs.test_pdb2gmx import * +from tests.gromacs.test_grompp import * +from tests.gromacs.test_trjconv import * +from tests.gromacs.test_cluster import * +from tests.gromacs.test_mmpbsa import * +from tests.gromacs.test_cluster_ts import * + +# from tests.gromacs.test_do_dssp import * +from tests.gromacs.test_trjcat import * +from tests.gromacs.test_rmsd import * diff --git a/tests/gromacs/test_cluster.py b/tests/gromacs/test_cluster.py new file mode 100644 index 0000000..26f4a81 --- /dev/null +++ b/tests/gromacs/test_cluster.py @@ -0,0 +1,70 @@ +from icolos.core.containers.generic import GenericData +from icolos.core.workflow_steps.gromacs.cluster import StepGMXCluster +import unittest +import os +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars +from icolos.utils.general.files_paths import attach_root_path + +SGE = StepGromacsEnum() +SBE = StepBaseEnum + + +class Test_Cluster(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/gromacs") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_XTC), "rb") as f: + self.xtc = f.read() + + with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_TPR_TRJCONV), "rb") as f: + self.tpr = f.read() + + with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_STRUCTURE_FILE), "r") as f: + self.structure = f.read() + + def test_cluster(self): + step_conf = { + SBE.STEPID: "test_cluster", + SBE.STEP_TYPE: "cluster", + SBE.EXEC: { + SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + SBE.SETTINGS: { + SBE.SETTINGS_ARGUMENTS: { + SBE.SETTINGS_ARGUMENTS_FLAGS: [], + SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-dt": "1000", + "-n": "index.ndx", + }, + }, + SBE.SETTINGS_ADDITIONAL: { + SBE.PIPE_INPUT: "2 System", + SGE.MAKE_NDX_COMMAND: "1 & a P", + }, + }, + } + + step_cluster = StepGMXCluster(**step_conf) + step_cluster.data.generic.add_file( + GenericData(file_name="tmp10249.xtc", file_data=self.xtc, argument=True) + ) + step_cluster.data.generic.add_file( + GenericData(file_name="tmp03942.tpr", file_data=self.tpr, argument=True) + ) + step_cluster.data.generic.add_file( + GenericData( + file_name="structure.gro", file_data=self.structure, argument=True + ) + ) + step_cluster.execute() + out_path = os.path.join(self._test_dir, "clusters.pdb") + step_cluster.write_generic_by_extension(self._test_dir, "pdb") + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 2002553) diff --git a/tests/gromacs/test_cluster_ts.py b/tests/gromacs/test_cluster_ts.py new file mode 100644 index 0000000..7396ae7 --- /dev/null +++ b/tests/gromacs/test_cluster_ts.py @@ -0,0 +1,55 @@ +from icolos.core.workflow_steps.gromacs.clusters_ts import StepClusterTS +from icolos.core.containers.generic import GenericData +import unittest +import os +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars +from icolos.utils.general.files_paths import attach_root_path + +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum + + +class Test_ts_cluster(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/gromacs") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_TS_CLUSTERS), "r") as f: + self.data = f.read() + + def test_ts_cluster(self): + step_conf = { + _SBE.STEPID: "test_ts_cluster", + _SBE.STEP_TYPE: "ts_cluster", + _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load R"}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "lengths": "10001", + "clustersNumber": "13", + "mdEngine": "GROMACS", + }, + } + }, + } + + step_ts_cluster = StepClusterTS(**step_conf) + step_ts_cluster.data.generic.add_file( + GenericData( + file_name="clusters_ts_example.xvg", file_data=self.data, argument=True + ) + ) + + step_ts_cluster.execute() + + out_path = os.path.join(self._test_dir, "clusters_ts.png") + step_ts_cluster.write_generic_by_name(self._test_dir, "clusters_ts.png") + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 36102) diff --git a/tests/gromacs/test_do_dssp.py b/tests/gromacs/test_do_dssp.py new file mode 100644 index 0000000..fb05dce --- /dev/null +++ b/tests/gromacs/test_do_dssp.py @@ -0,0 +1,54 @@ +from icolos.core.containers.generic import GenericData +import unittest +import os +from icolos.core.workflow_steps.gromacs.do_dssp import StepGMXDoDSSP +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars +from icolos.utils.general.files_paths import attach_root_path + +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum + + +class Test_Editconf(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/gromacs") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_DSSP_TPR), "rb") as f: + self.structure = f.read() + with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_DSSP_XTC), "rb") as f: + self.traj = f.read() + + def test_editconf_run(self): + step_conf = { + _SBE.STEPID: "test_dssp", + _SBE.STEP_TYPE: "dssp", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + } + }, + } + + step_do_dssp = StepGMXDoDSSP(**step_conf) + step_do_dssp.data.generic.add_file( + GenericData(file_name="structure.tpr", file_data=self.structure) + ) + step_do_dssp.data.generic.add_file( + GenericData(file_name="traj.xtc", file_data=self.traj) + ) + step_do_dssp.execute() + out_path = os.path.join(self._test_dir, "info.dat") + step_do_dssp.write_generic_by_name(self._test_dir, "info.dat") + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 22377) diff --git a/tests/gromacs/test_editconf.py b/tests/gromacs/test_editconf.py new file mode 100644 index 0000000..ddb7fe8 --- /dev/null +++ b/tests/gromacs/test_editconf.py @@ -0,0 +1,54 @@ +from icolos.core.containers.generic import GenericData +import unittest +import os +from icolos.core.workflow_steps.gromacs.editconf import StepGMXEditConf +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars +from icolos.utils.general.files_paths import attach_root_path + +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum + + +class Test_Editconf(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/gromacs") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_STRUCTURE_FILE), "r") as f: + self.structure = f.read() + + def test_editconf_run(self): + step_conf = { + _SBE.STEPID: "test_editconf", + _SBE.STEP_TYPE: "editconf", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-d": "1.0", + "-bt": "dodecahedron", + }, + } + }, + } + + step_editconf = StepGMXEditConf(**step_conf) + step_editconf.data.generic.add_file( + GenericData( + file_name="structure.gro", file_data=self.structure, argument=True + ) + ) + step_editconf.execute() + out_path = os.path.join(self._test_dir, "structure.gro") + step_editconf.write_generic_by_name(self._test_dir, "structure.gro") + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 22377) diff --git a/tests/gromacs/test_genion.py b/tests/gromacs/test_genion.py new file mode 100644 index 0000000..af1ae30 --- /dev/null +++ b/tests/gromacs/test_genion.py @@ -0,0 +1,59 @@ +from icolos.core.containers.generic import GenericData +from icolos.core.workflow_steps.gromacs.genion import StepGMXGenion +import unittest +import os +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars +from icolos.utils.general.files_paths import attach_root_path + +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum + + +class Test_Genion(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/gromacs") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_TOPOL_FILE), "r") as f: + self.topol = f.read() + with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_TPR_FILE), "rb") as f: + self.tpr = f.read() + + def test_genion_run(self): + step_conf = { + _SBE.STEPID: "test_genion", + _SBE.STEP_TYPE: "genion", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-neutral"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-pname": "NA", + "-nname": "CL", + }, + }, + _SBE.SETTINGS_ADDITIONAL: {_SBE.PIPE_INPUT: "3"}, + }, + } + + step_genion = StepGMXGenion(**step_conf) + step_genion.data.generic.add_file( + GenericData(file_name="topol.top", file_data=self.topol, argument=True) + ) + step_genion.data.generic.add_file( + GenericData(file_name="structure.tpr", file_data=self.tpr, argument=True) + ) + step_genion.execute() + + out_path = os.path.join(self._test_dir, "structure.gro") + step_genion.write_generic_by_name(self._test_dir, "structure.gro") + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 874941) diff --git a/tests/gromacs/test_grompp.py b/tests/gromacs/test_grompp.py new file mode 100644 index 0000000..c536750 --- /dev/null +++ b/tests/gromacs/test_grompp.py @@ -0,0 +1,72 @@ +from icolos.core.containers.generic import GenericData +import unittest +import os +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars +from icolos.core.workflow_steps.gromacs.grompp import StepGMXGrompp +from icolos.utils.general.files_paths import attach_root_path + +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum + + +class Test_Grompp(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/gromacs") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + with open(PATHS_EXAMPLEDATA.GROMACS_GROMPP_INPUT_STRUCTURE, "r") as f: + self.structure = f.read() + with open(PATHS_EXAMPLEDATA.GROMACS_IONS_MDP, "r") as f: + self.mdp = f.read() + with open(PATHS_EXAMPLEDATA.GROMACS_GROMPP_TOPOL, "r") as f: + self.topol = f.read() + + def test_grompp(self): + step_conf = { + _SBE.STEPID: "test_grompp", + _SBE.STEP_TYPE: "grompp", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SGE.FIELDS: { + "nsteps": 50, + "-nsteeps": 123, + }, # deliberate typo to check warning + _SGE.FORCEFIELD: "/projects/cc/mai/material/Icolos/forcefields/charmm36-feb2021.ff", + "-r": False, + _SGE.MAKE_NDX_COMMAND: "auto", + }, + }, + } + + step_grompp = StepGMXGrompp(**step_conf) + step_grompp.data.generic.add_file( + GenericData( + file_name="tmp029389.gro", file_data=self.structure, argument=True + ) + ) + step_grompp.data.generic.add_file( + GenericData(file_name="tmp03394.mdp", file_data=self.mdp, argument=True) + ) + step_grompp.data.generic.add_file( + GenericData(file_name="tmp91023.top", file_data=self.topol, argument=True) + ) + + step_grompp.execute() + + out_path = os.path.join(self._test_dir, "structure.tpr") + step_grompp.write_generic_by_name(self._test_dir, "structure.tpr") + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 596160) diff --git a/tests/gromacs/test_mdrun.py b/tests/gromacs/test_mdrun.py new file mode 100644 index 0000000..bc8abdd --- /dev/null +++ b/tests/gromacs/test_mdrun.py @@ -0,0 +1,70 @@ +from icolos.core.containers.generic import GenericData +import unittest +import os +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars +from icolos.utils.general.files_paths import attach_root_path +from icolos.core.workflow_steps.gromacs.mdrun import StepGMXMDrun + + +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum + + +class Test_MDrun(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/gromacs") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + with open(PATHS_EXAMPLEDATA.GROMACS_TPR_FILE, "rb") as f: + self.tpr = f.read() + + def test_mdrun(self): + step_conf = { + _SBE.STEPID: "test_mdrun", + _SBE.STEP_TYPE: "mdrun", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + } + + step_mdrun = StepGMXMDrun(**step_conf) + step_mdrun.data.generic.add_file( + GenericData(file_name="structure.tpr", file_data=self.tpr, argument=True) + ) + step_mdrun.execute() + + out_path = os.path.join(self._test_dir, "structure.gro") + step_mdrun.write_generic_by_extension(self._test_dir, "gro") + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 874941) + + def test_mdrun_slurm(self): + step_conf = { + _SBE.STEPID: "test_mdrun", + _SBE.STEP_TYPE: "mdrun", + _SBE.EXEC: { + _SBE.EXEC_RESOURCE: "slurm", + _SBE.EXEC_JOB_CONTROL: { + _SBE.EXEC_JOB_CONTROL_PARTITION: "gpu", + _SBE.EXEC_JOB_CONTROL_GRES: "gpu:1", + _SBE.EXEC_JOB_CONTROL_MODULES: ["GROMACS/2020.3-fosscuda-2019a"], + }, + }, + } + + step_mdrun = StepGMXMDrun(**step_conf) + step_mdrun.data.generic.add_file( + GenericData(file_name="structure.tpr", file_data=self.tpr, argument=True) + ) + step_mdrun.execute() + + out_path = os.path.join(self._test_dir, "structure.gro") + step_mdrun.write_generic_by_extension(self._test_dir, "gro") + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 874941) diff --git a/tests/gromacs/test_mmpbsa.py b/tests/gromacs/test_mmpbsa.py new file mode 100644 index 0000000..a28d02d --- /dev/null +++ b/tests/gromacs/test_mmpbsa.py @@ -0,0 +1,142 @@ +from icolos.core.workflow_steps.gromacs.mmpbsa import StepGMXmmpbsa +from icolos.core.containers.generic import GenericData +import unittest +import os +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars, MAIN_CONFIG +from icolos.utils.general.files_paths import attach_root_path +from time import time + +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum + + +class Test_MMPBSA(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls._test_dir = attach_root_path("tests/junk/gromacs") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self) -> None: + with open(PATHS_EXAMPLEDATA.GROMACS_HOLO_STRUCTURE_GRO, "r") as f: + self.structure = f.read() + + with open(PATHS_EXAMPLEDATA.MMPBSA_TOP, "r") as f: + self.topol = f.read() + + with open(PATHS_EXAMPLEDATA.MMPBSA_TPR, "rb") as f: + self.tpr_file = f.read() + + with open(PATHS_EXAMPLEDATA.MMPBSA_XTC, "rb") as f: + self.xtc_file = f.read() + + with open(PATHS_EXAMPLEDATA.MMPBSA_POSRE, "rb") as f: + self.posre = f.read() + + with open(PATHS_EXAMPLEDATA.MMPBSA_LIG_ITP, "rb") as f: + self.lig_itp = f.read() + + with open(PATHS_EXAMPLEDATA.MMPBSA_LIG_POSRE, "rb") as f: + self.lig_posre = f.read() + + def test_protein_lig_single_traj(self): + step_conf = { + _SBE.STEPID: "test_gmmpbsa", + _SBE.STEP_TYPE: "gmx_mmpbsa", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a && module load gmx_MMPBSA && module load AmberTools/21-fosscuda-2019a-Python-3.7.2" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SGE.FORCEFIELD: MAIN_CONFIG["FORCEFIELD"], + _SGE.COUPLING_GROUPS: "Protein Other", + }, + }, + } + step_mmpbsa = StepGMXmmpbsa(**step_conf) + step_mmpbsa.data.generic.add_file( + GenericData(file_name="structure.gro", file_data=self.structure) + ) + step_mmpbsa.data.generic.add_file( + GenericData(file_name="topol.top", file_data=self.topol) + ) + step_mmpbsa.data.generic.add_file( + GenericData(file_name="structure.xtc", file_data=self.xtc_file) + ) + step_mmpbsa.data.generic.add_file( + GenericData(file_name="structure.tpr", file_data=self.tpr_file) + ) + step_mmpbsa.data.generic.add_file( + GenericData(file_name="posre.itp", file_data=self.posre) + ) + step_mmpbsa.data.generic.add_file( + GenericData(file_name="DMP:100.itp", file_data=self.lig_itp) + ) + step_mmpbsa.data.generic.add_file( + GenericData(file_name="posre_DMP:100.itp", file_data=self.lig_posre) + ) + step_mmpbsa.execute() + out_path = os.path.join(self._test_dir, "FINAL_RESULTS_MMPBSA.dat") + step_mmpbsa.write_generic_by_extension(self._test_dir, "dat") + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 5570) + + def test_protein_lig_single_traj_custom_file(self): + + step_conf = { + _SBE.STEPID: "test_gmmpbsa", + _SBE.STEP_TYPE: "gmx_mmpbsa", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a && module load gmx_MMPBSA && module load AmberTools/21-fosscuda-2019a-Python-3.7.2" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SGE.FORCEFIELD: MAIN_CONFIG["FORCEFIELD"], + _SGE.COUPLING_GROUPS: "Protein Other", + _SGE.INPUT_FILE: PATHS_EXAMPLEDATA.MMPBSA_CUSTOM_INPUT, + "ntasks": 2, + }, + }, + } + step_mmpbsa = StepGMXmmpbsa(**step_conf) + step_mmpbsa.data.generic.add_file( + GenericData(file_name="structure.gro", file_data=self.structure) + ) + step_mmpbsa.data.generic.add_file( + GenericData(file_name="topol.top", file_data=self.topol) + ) + step_mmpbsa.data.generic.add_file( + GenericData(file_name="structure.xtc", file_data=self.xtc_file) + ) + step_mmpbsa.data.generic.add_file( + GenericData(file_name="structure.tpr", file_data=self.tpr_file) + ) + step_mmpbsa.data.generic.add_file( + GenericData(file_name="posre.itp", file_data=self.posre) + ) + step_mmpbsa.data.generic.add_file( + GenericData(file_name="DMP:100.itp", file_data=self.lig_itp) + ) + step_mmpbsa.data.generic.add_file( + GenericData(file_name="posre_DMP:100.itp", file_data=self.lig_posre) + ) + t1 = time() + step_mmpbsa.execute() + exec_time = time() - t1 + print("single traj exec time, custom input", exec_time) + out_path = os.path.join(self._test_dir, "FINAL_RESULTS_MMPBSA.dat") + step_mmpbsa.write_generic_by_extension(self._test_dir, "dat") + stat_inf = os.stat(out_path) + + self.assertGreater(stat_inf.st_size, 4680) diff --git a/tests/gromacs/test_pdb2gmx.py b/tests/gromacs/test_pdb2gmx.py new file mode 100644 index 0000000..641abe8 --- /dev/null +++ b/tests/gromacs/test_pdb2gmx.py @@ -0,0 +1,91 @@ +from icolos.core.containers.generic import GenericData +import unittest +import os +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars +from icolos.utils.general.files_paths import attach_root_path +from icolos.core.workflow_steps.gromacs.pdb2gmx import StepGMXPdb2gmx + +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum + + +class Test_Pdb2gmx(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/gromacs") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + with open(PATHS_EXAMPLEDATA.GROMACS_PDB_FILE, "r") as f: + self.structure = f.read() + with open(PATHS_EXAMPLEDATA.GROMACS_HOLO_STRUCTURE, "r") as f: + self.holo_structure = f.read() + + def test_pdb2gmx_run(self): + step_conf = { + _SBE.STEPID: "test_pdb2gmx", + _SBE.STEP_TYPE: "pdb2gmx", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-ignh"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-water": "tip4p", + "-ff": "amber03", + }, + } + }, + } + + step_pdb2gmx = StepGMXPdb2gmx(**step_conf) + step_pdb2gmx.data.generic.add_file( + GenericData( + file_name="structure.pdb", file_data=self.structure, argument=True + ) + ) + step_pdb2gmx.execute() + out_path = os.path.join(self._test_dir, "structure.gro") + step_pdb2gmx.write_generic_by_extension( + self._test_dir, _SGE.FIELD_KEY_STRUCTURE + ) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 22300) + + def test_lig_param(self): + step_conf = { + _SBE.STEPID: "test_pdb2gmx_lig_param", + _SBE.STEP_TYPE: "pdb2gmx_lig", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ADDITIONAL: {_SBE.PIPE_INPUT: "echo 3"}, + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-ignh"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-water": "tip4p", + "-ff": "amber03", + }, + }, + }, + } + + step_lig_param = StepGMXPdb2gmx(**step_conf) + step_lig_param.data.generic.add_file( + GenericData( + file_name="tmp_whatever01923.pdb", file_data=self.holo_structure + ) + ) + step_lig_param.execute() + out_path = os.path.join(self._test_dir, "structure.gro") + step_lig_param.write_generic_by_extension( + self._test_dir, _SGE.FIELD_KEY_STRUCTURE + ) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 73800) diff --git a/tests/gromacs/test_removelig.py b/tests/gromacs/test_removelig.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/gromacs/test_removelig.py @@ -0,0 +1 @@ + diff --git a/tests/gromacs/test_rmsd.py b/tests/gromacs/test_rmsd.py new file mode 100644 index 0000000..15d9148 --- /dev/null +++ b/tests/gromacs/test_rmsd.py @@ -0,0 +1,56 @@ +from icolos.core.containers.generic import GenericData +import unittest +import os +from icolos.core.workflow_steps.gromacs.rsmd import StepGMXrmsd +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + export_unit_test_env_vars, + get_docked_ligands_as_conformers, +) +from icolos.utils.general.files_paths import attach_root_path + +SGE = StepGromacsEnum() +SBE = StepBaseEnum + + +class Test_Trjcat(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/gromacs") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + with open(PATHS_EXAMPLEDATA.GROMACS_DMP_LIGAND_TRJ, "rb") as f: + self.xtc = f.read() + + # load the docked pose as a compound + self.comp = get_docked_ligands_as_conformers( + PATHS_EXAMPLEDATA.GROMACS_DMP_LIGAND_SDF + ) + print(self.comp) + + def test_gmx_rmsd(self): + step_conf = { + SBE.STEPID: "test_gmx_rmsd", + SBE.STEP_TYPE: "gmx_rmsd", + SBE.EXEC: { + SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + SBE.SETTINGS: {}, + } + + step_rmsd = StepGMXrmsd(**step_conf) + step_rmsd.data.generic.add_file( + GenericData(file_name="structure.xtc", file_data=self.xtc, argument=True) + ) + step_rmsd.data.compounds = self.comp + + step_rmsd.execute() + out_path = os.path.join(self._test_dir, "rmsd.xvg") + step_rmsd.write_generic_by_extension(self._test_dir, "xvg") + stat_inf = os.stat(out_path) + self.assertAlmostEqual(stat_inf.st_size, 3220, delta=100) diff --git a/tests/gromacs/test_solvate.py b/tests/gromacs/test_solvate.py new file mode 100644 index 0000000..1dd1e7a --- /dev/null +++ b/tests/gromacs/test_solvate.py @@ -0,0 +1,60 @@ +from icolos.core.containers.generic import GenericData +import unittest +import os +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars +from icolos.utils.general.files_paths import attach_root_path +from icolos.core.workflow_steps.gromacs.solvate import StepGMXSolvate + +_SBE = StepBaseEnum +_SGE = StepGromacsEnum() + + +class Test_Solvate(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/gromacs") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + with open(PATHS_EXAMPLEDATA.GROMACS_TOPOL_FILE, "r") as f: + self.topol = f.read() + with open(PATHS_EXAMPLEDATA.GROMACS_STRUCTURE_FILE, "r") as f: + self.structure = f.read() + + def test_solvate(self): + step_conf = { + _SBE.STEPID: "test_solvate", + _SBE.STEP_TYPE: "solvate", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + } + }, + } + + step_solvate = StepGMXSolvate(**step_conf) + step_solvate.data.generic.add_file( + GenericData( + file_name="structure.gro", file_data=self.structure, argument=True + ) + ) + step_solvate.data.generic.add_file( + GenericData(file_name="topol.top", file_data=self.topol, argument=True) + ) + + step_solvate.execute() + + out_path = os.path.join(self._test_dir, "structure.gro") + step_solvate.write_generic_by_extension( + self._test_dir, _SGE.FIELD_KEY_STRUCTURE + ) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 650000) diff --git a/tests/gromacs/test_trjcat.py b/tests/gromacs/test_trjcat.py new file mode 100644 index 0000000..ebc3709 --- /dev/null +++ b/tests/gromacs/test_trjcat.py @@ -0,0 +1,56 @@ +from icolos.core.containers.generic import GenericData +import unittest +import os +from icolos.core.workflow_steps.gromacs.trajcat import StepGMXTrjcat +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars +from icolos.utils.general.files_paths import attach_root_path + +SGE = StepGromacsEnum() +SBE = StepBaseEnum + + +class Test_Trjcat(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/gromacs") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + with open(PATHS_EXAMPLEDATA.GROMACS_XTC, "rb") as f: + self.xtc = f.read() + + with open(PATHS_EXAMPLEDATA.GROMACS_TPR_TRJCONV, "rb") as f: + self.tpr = f.read() + + def test_trjconv(self): + step_conf = { + SBE.STEPID: "test_trjcat", + SBE.STEP_TYPE: "trjcat", + SBE.EXEC: { + SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + SBE.SETTINGS: {}, + } + + step_trjconv = StepGMXTrjcat(**step_conf) + step_trjconv.data.generic.add_file( + GenericData(file_name="structure.xtc", file_data=self.xtc, argument=True) + ) + step_trjconv.data.generic.add_file( + GenericData(file_name="structure_2.xtc", file_data=self.xtc, argument=True) + ) + step_trjconv.data.generic.add_file( + GenericData(file_name="structure_3.xtc", file_data=self.xtc, argument=True) + ) + # step_trjconv.data.generic.add_file( + # GenericData(file_name="structure.tpr", file_data=self.tpr, argument=True) + # ) + step_trjconv.execute() + out_path = os.path.join(self._test_dir, "trjcat_out.xtc") + step_trjconv.write_generic_by_extension(self._test_dir, "xtc") + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 30088548) diff --git a/tests/gromacs/test_trjconv.py b/tests/gromacs/test_trjconv.py new file mode 100644 index 0000000..4969451 --- /dev/null +++ b/tests/gromacs/test_trjconv.py @@ -0,0 +1,53 @@ +from icolos.core.containers.generic import GenericData +import unittest +import os +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum +from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars +from icolos.utils.general.files_paths import attach_root_path +from icolos.core.workflow_steps.gromacs.trjconv import StepGMXTrjconv + +SGE = StepGromacsEnum() +SBE = StepBaseEnum + + +class Test_Trjconv(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/gromacs") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + with open(PATHS_EXAMPLEDATA.GROMACS_XTC, "rb") as f: + self.xtc = f.read() + + with open(PATHS_EXAMPLEDATA.GROMACS_TPR_TRJCONV, "rb") as f: + self.tpr = f.read() + + def test_trjconv(self): + step_conf = { + SBE.STEPID: "test_trjconv", + SBE.STEP_TYPE: "trjconv", + SBE.EXEC: { + SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + SBE.SETTINGS: { + SBE.SETTINGS_ARGUMENTS_FLAGS: ["-center"], + SBE.SETTINGS_ADDITIONAL: {SBE.PIPE_INPUT: "echo -ne 1 0"}, + }, + } + + step_trjconv = StepGMXTrjconv(**step_conf) + step_trjconv.data.generic.add_file( + GenericData(file_name="structure.xtc", file_data=self.xtc, argument=True) + ) + step_trjconv.data.generic.add_file( + GenericData(file_name="structure.tpr", file_data=self.tpr, argument=True) + ) + step_trjconv.execute() + out_path = os.path.join(self._test_dir, "structure.xtc") + step_trjconv.write_generic_by_extension(self._test_dir, "xtc") + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 10029516) diff --git a/tests/integration_tests/__init__.py b/tests/integration_tests/__init__.py new file mode 100644 index 0000000..862a76e --- /dev/null +++ b/tests/integration_tests/__init__.py @@ -0,0 +1,5 @@ +# from tests.integration_tests.test_fep_plus import * +from tests.integration_tests.test_docking import * +from tests.integration_tests.test_gromacs import * + +from tests.integration_tests.test_rmsd_iter import * diff --git a/tests/integration_tests/test_docking.py b/tests/integration_tests/test_docking.py new file mode 100644 index 0000000..a343b0a --- /dev/null +++ b/tests/integration_tests/test_docking.py @@ -0,0 +1,239 @@ +import unittest +import os +from tests.tests_paths import PATHS_EXAMPLEDATA, PATHS_1UYD +from icolos.utils.general.files_paths import attach_root_path +from icolos.core.composite_agents.workflow import WorkFlow +from icolos.utils.enums.composite_agents_enums import WorkflowEnum +from icolos.utils.enums.step_enums import StepBaseEnum, StepGlideEnum + +_WE = WorkflowEnum() +_SBE = StepBaseEnum +_SGE = StepGlideEnum() + + +class TestDockingWorkflow(unittest.TestCase): + @classmethod + def setUpClass(cls): + + cls._test_dir = attach_root_path("tests/junk/integration") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + @classmethod + def tearDownClass(cls): + pass + + def test_docking_workflow(self): + + conf = { + _WE.HEADER: { + _WE.ID: "NIBR", + _WE.ENVIRONMENT: { + _WE.ENVIRONMENT_EXPORT: [ + { + _WE.ENVIRONMENT_EXPORT_KEY: "OE_LICENSE", + _WE.ENVIRONMENT_EXPORT_VALUE: "/opt/scp/software/oelicense/1.0/oe_license.seq1", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "XTBHOME", + _WE.ENVIRONMENT_EXPORT_VALUE: "/projects/cc/mai/binaries/xtb-6.4.0", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "XTBPATH", + _WE.ENVIRONMENT_EXPORT_VALUE: "${XTBHOME}/share/xtb", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "PATH", + _WE.ENVIRONMENT_EXPORT_VALUE: "${PATH}:${XTBHOME}/bin", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "PKG_CONFIG_PATH", + _WE.ENVIRONMENT_EXPORT_VALUE: "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "PARA_ARCH", + _WE.ENVIRONMENT_EXPORT_VALUE: "MPI", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "PARNODES", + _WE.ENVIRONMENT_EXPORT_VALUE: "6", + }, + ] + }, + _WE.GLOBAL_VARIABLES: { + "smiles": "3,4-DIAMINOBENZOTRIFLUORIDE:Nc1ccc(cc1N)C(F)(F)F;aspirin:O=C(C)Oc1ccccc1C(=O)O" + }, + }, + _WE.STEPS: [ + { + _SBE.STEPID: "initialization_smile", + _SBE.STEP_TYPE: "initialization", + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "{smiles}", + _SBE.INPUT_SOURCE_TYPE: "string", + } + ] + }, + }, + { + _SBE.STEPID: "Ligprep", + _SBE.STEP_TYPE: "ligprep", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws", + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 4, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1, + }, + _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY: 3}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-epik"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-ph": 7.0, + "-pht": 1.0, + "-s": 1, + "-bff": 14, + }, + }, + _SBE.SETTINGS_ADDITIONAL: {}, + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "initialization_smile", + _SBE.INPUT_SOURCE_TYPE: "step", + } + ] + }, + _SBE.WRITEOUT: [ + { + _SBE.INPUT_COMPOUNDS: { + _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_ENUMERATIONS + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: f"{self._test_dir}/nibr_ligprep.sdf", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "SDF", + }, + } + ], + }, + { + _SBE.STEPID: "Glide", + _SBE.STEP_TYPE: "glide", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws", + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 8, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1, + }, + _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 3}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-HOST": "localhost"}, + }, + _SBE.SETTINGS_ADDITIONAL: { + "configuration": { + "AMIDE_MODE": "trans", + "EXPANDED_SAMPLING": "True", + "GRIDFILE": [PATHS_1UYD.GRID_PATH], + "NENHANCED_SAMPLING": "1", + "POSE_OUTTYPE": "ligandlib_sd", + "POSES_PER_LIG": "15", + "POSTDOCK_NPOSE": "25", + "POSTDOCKSTRAIN": "True", + "PRECISION": "SP", + "REWARD_INTRA_HBONDS": "True", + } + }, + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "Ligprep", + _SBE.INPUT_SOURCE_TYPE: "step", + } + ] + }, + _SBE.WRITEOUT: [ + { + _SBE.INPUT_COMPOUNDS: { + _SBE.WRITEOUT_COMP_CATEGORY: "conformers" + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: f"{self._test_dir}/tests/junk/nibr_glide.sdf", + _SBE.WRITEOUT_DESTINATION_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "SDF", + }, + } + ], + }, + { + _SBE.STEPID: "Shaep", + _SBE.STEP_TYPE: "shaep", + _SBE.EXEC: {_SBE.EXEC_BINARYLOCATION: "/projects/cc/mai/binaries"}, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: PATHS_EXAMPLEDATA.PANTHER_NEGATIVE_IMAGE, + _SBE.INPUT_EXTENSION: "mol2", + } + ], + _SBE.INPUT_COMPOUNDS: [ + {_SBE.INPUT_SOURCE: "Glide", _SBE.INPUT_SOURCE_TYPE: "step"} + ], + }, + _SBE.WRITEOUT: [ + { + _SBE.INPUT_COMPOUNDS: { + _SBE.WRITEOUT_COMP_CATEGORY: "conformers", + _SBE.WRITEOUT_COMP_SELECTED_TAGS: [ + "shape_similarity", + "esp_similarity", + ], + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: f"{self._test_dir}/tests/junk/nibr_final_all.csv", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "CSV", + }, + }, + { + _SBE.INPUT_COMPOUNDS: { + _SBE.WRITEOUT_COMP_CATEGORY: "conformers", + _SBE.WRITEOUT_COMP_SELECTED_TAGS: [ + "shape_similarity", + "esp_similarity", + ], + _SBE.WRITEOUT_COMP_AGGREGATION: { + _SBE.WRITEOUT_COMP_AGGREGATION_MODE: "best_per_compound", + _WE.ENVIRONMENT_EXPORT_KEY: "shape_similarity", + _SBE.WRITEOUT_COMP_AGGREGATION_HIGHESTISBEST: True, + }, + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: f"{self._test_dir}/nibr_final_bestpercompound.csv", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "CSV", + }, + }, + ], + }, + ], + } + + wflow = WorkFlow(**conf) + wflow.initialize() + + self.assertEqual(len(wflow.steps), 4) + + wflow.execute() + + out_path = os.path.join(self._test_dir, "nibr_final_bestpercompound.csv") + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 110) diff --git a/tests/integration_tests/test_fep_plus.py b/tests/integration_tests/test_fep_plus.py new file mode 100644 index 0000000..fd695db --- /dev/null +++ b/tests/integration_tests/test_fep_plus.py @@ -0,0 +1,224 @@ +import unittest +import os +from tests.tests_paths import PATHS_EXAMPLEDATA +from icolos.utils.general.files_paths import attach_root_path +from icolos.core.composite_agents.workflow import WorkFlow +from icolos.utils.enums.composite_agents_enums import WorkflowEnum +from icolos.utils.enums.step_enums import StepBaseEnum, StepGlideEnum, TokenGuardEnum + +_WE = WorkflowEnum() +_SBE = StepBaseEnum +_SGE = StepGlideEnum() +_TE = TokenGuardEnum() + + +class TestFEPPlusWorkflow(unittest.TestCase): + @classmethod + def setUpClass(cls): + + cls._test_dir = attach_root_path("tests/junk/integration") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + @classmethod + def tearDownClass(cls): + pass + + def test_fep_plus_workflow(self): + + conf = { + _WE.HEADER: { + _WE.ID: "Docking/FEP+ combined workflow", + _WE.DESCRIPTION: "test setup for FEP+ integration", + _WE.ENVIRONMENT: {_WE.ENVIRONMENT_EXPORT: []}, + _WE.GLOBAL_VARIABLES: { + "smiles": "3,4-DIAMINOBENZOTRIFLUORIDE:Nc1ccc(cc1N)C(F)(F)F" + }, + }, + _WE.STEPS: [ + { + _SBE.STEPID: "initialization_smile", + _SBE.STEP_TYPE: "initialization", + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "{smiles}", + _SBE.INPUT_SOURCE_TYPE: "string", + } + ] + }, + }, + { + _SBE.STEPID: "Ligprep", + _SBE.STEP_TYPE: "ligprep", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws", + "parallelization": {"cores": 2, "max_length_sublists": 1}, + "failure_policy": {"n_tries": 3}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-epik"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-ph": 7.0, + "-pht": 2.0, + "-s": 10, + "-bff": 14, + "-HOST": "localhost", + }, + }, + _SBE.SETTINGS_ADDITIONAL: { + "filter_file": {"Total_charge": "!= 0"} + }, + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "initialization_smile", + _SBE.INPUT_SOURCE_TYPE: "step", + } + ] + }, + _SBE.WRITEOUT: [ + { + _SBE.INPUT_COMPOUNDS: { + _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_ENUMERATIONS + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: "{entrypoint_dir}/ligprep_enums.sdf", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "SDF", + }, + } + ], + }, + { + _SBE.STEPID: "Glide", + _SBE.STEP_TYPE: "glide", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws", + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 4, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1, + }, + _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 3}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-HOST": "localhost"}, + }, + _SBE.SETTINGS_ADDITIONAL: { + "configuration": { + "AMIDE_MODE": "trans", + "EXPANDED_SAMPLING": "True", + "GRIDFILE": [PATHS_EXAMPLEDATA.PRIME_COX2_GRID], + "NENHANCED_SAMPLING": "1", + "POSE_OUTTYPE": "poseviewer", + "POSES_PER_LIG": "1", + "POSTDOCK_NPOSE": "25", + "POSTDOCKSTRAIN": "True", + "PRECISION": "SP", + "REWARD_INTRA_HBONDS": "True", + } + }, + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "Ligprep", + _SBE.INPUT_SOURCE_TYPE: "step", + } + ] + }, + _SBE.WRITEOUT: [ + { + _SBE.INPUT_COMPOUNDS: {"category": "conformers"}, + "destination": { + "resource": "{entrypoint_dir}/tests/junk/docked_conformers_cox2_actives.sdf", + _SBE.STEP_TYPE: "file", + "format": "SDF", + }, + } + ], + }, + { + _SBE.STEPID: "FEP_plus_setup", + _SBE.STEP_TYPE: "fep_plus_setup", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws" + }, + _SBE.SETTINGS: {}, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "Glide", + _SBE.INPUT_SOURCE_TYPE: "step", + "target_field": _SBE.INPUT_COMPOUNDS, + } + ] + }, + }, + { + _SBE.STEPID: "FEP_plus_exec", + _SBE.STEP_TYPE: "fep_plus_exec", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws" + }, + _SBE.TOKEN_GUARD: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws", + _SBE.EXEC_BINARYLOCATION: "ssh 10.220.1.4 /opt/schrodinger/suite/installations/default", + _TE.TG_TOKEN_POOLS: {"FEP_GPGPU": 16}, + "wait_interval_seconds": 30, + "wait_limit_seconds": 0, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-JOBNAME": "test", + "-HOST": "fep-compute", + }, + } + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "Glide", + _SBE.INPUT_SOURCE_TYPE: "step", + "target_field": _SBE.INPUT_COMPOUNDS, + } + ], + "generic": [ + {_SBE.INPUT_SOURCE: "FEP_plus_setup", "extension": "fmp"} + ], + }, + _SBE.WRITEOUT: [ + { + _SBE.INPUT_COMPOUNDS: { + _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS, + _SBE.WRITEOUT_COMP_SELECTED_TAGS: [ + "dG", + "docking_score", + ], + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION: os.path.join( + self._test_dir, "fep_scored_conformers.csv" + ), + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "CSV", + }, + } + ], + }, + ], + } + + wflow = WorkFlow(**conf) + wflow.initialize() + wflow.execute() + + out_path = os.path.join(self._test_dir, "fep_scored_conformers.csv") + stat_inf = os.stat(out_path) + self.assertGreaterEqual(stat_inf.st_size, 4252) diff --git a/tests/integration_tests/test_gromacs.py b/tests/integration_tests/test_gromacs.py new file mode 100644 index 0000000..aae0171 --- /dev/null +++ b/tests/integration_tests/test_gromacs.py @@ -0,0 +1,1076 @@ +import unittest +import os +from tests.tests_paths import MAIN_CONFIG, PATHS_EXAMPLEDATA, export_unit_test_env_vars +from icolos.utils.general.files_paths import attach_root_path +from icolos.core.composite_agents.workflow import WorkFlow +from icolos.utils.enums.composite_agents_enums import WorkflowEnum +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum + +_WE = WorkflowEnum() +_SBE = StepBaseEnum +_SGE = StepGromacsEnum() + + +class Test_MD_Fpocket(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/integration") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def test_workflow_MD_fpocket_holo(self): + """ + run a full gromacs MD workflow from a pdb structure, then fpocket on the resulting trajectory + MDPocket is run on the holo structure + """ + + conf = { + _WE.HEADER: { + _WE.ID: "gromacs_test", + _WE.DESCRIPTION: "full md run with gromacs, with MDpocket run to extract descriptors for binding pocket", + _WE.ENVIRONMENT: { + _WE.ENVIRONMENT_EXPORT: [ + { + _WE.ENVIRONMENT_EXPORT_KEY: "GMX_GPU_DD_COMMS", + _WE.ENVIRONMENT_EXPORT_VALUE: "True", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "GMX_GPU_PME_PP_COMMS", + _WE.ENVIRONMENT_EXPORT_VALUE: "True", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "GMX_FORCE_UPDATE_DEFAULT_GPU", + _WE.ENVIRONMENT_EXPORT_VALUE: "True", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "GMXLIB", + _WE.ENVIRONMENT_EXPORT_VALUE: "/forcefields/", + }, + ] + }, + _WE.GLOBAL_VARIABLES: { + "root_dir": "/icolos", + "file_base": os.path.join( + MAIN_CONFIG["ICOLOS_TEST_DATA"], "gromacs/protein" + ), + "output_dir": attach_root_path("tests/junk/integration"), + }, + }, + _WE.STEPS: [ + { + _SBE.STEPID: "01_pdb2gmx", + _SBE.STEP_TYPE: "pdb2gmx", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-ignh"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-water": "tip4p", + "-ff": "amber14sb_OL15", + }, + }, + _SBE.SETTINGS_ADDITIONAL: {}, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: attach_root_path( + PATHS_EXAMPLEDATA.MDPOCKET_PDB_FILE_DRY + ), + _SBE.INPUT_EXTENSION: "pdb", + } + ] + }, + }, + { + _SBE.STEPID: "02_editconf", + _SBE.STEP_TYPE: "editconf", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-d": "1.5", + "-bt": "dodecahedron", + }, + }, + _SBE.SETTINGS_ADDITIONAL: {}, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "01_pdb2gmx", + _SBE.INPUT_EXTENSION: "gro", + } + ] + }, + }, + { + _SBE.STEPID: "03_solvate", + _SBE.STEP_TYPE: "solvate", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-cs": "tip4p"}, + }, + _SBE.SETTINGS_ADDITIONAL: {}, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "02_editconf", + _SBE.INPUT_EXTENSION: "gro", + }, + { + _SBE.INPUT_SOURCE: "01_pdb2gmx", + _SBE.INPUT_EXTENSION: "top", + }, + ] + }, + }, + { + _SBE.STEPID: "04_grompp", + _SBE.STEP_TYPE: "grompp", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + "-r": False, + }, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "03_solvate", + _SBE.INPUT_EXTENSION: "gro", + }, + { + _SBE.INPUT_SOURCE: "{file_base}/ions.mdp", + _SBE.INPUT_EXTENSION: "mdp", + }, + { + _SBE.INPUT_SOURCE: "03_solvate", + _SBE.INPUT_EXTENSION: "top", + }, + { + _SBE.INPUT_SOURCE: "01_pdb2gmx", + _SBE.INPUT_EXTENSION: "itp", + }, + ] + }, + }, + { + _SBE.STEPID: "05_genion", + _SBE.STEP_TYPE: "genion", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-neutral"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-pname": "NA", + "-nname": "CL", + }, + }, + _SBE.SETTINGS_ADDITIONAL: { + "pipe_input": "SOL", + }, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "04_grompp", + _SBE.INPUT_EXTENSION: "tpr", + }, + { + _SBE.INPUT_SOURCE: "04_grompp", + _SBE.INPUT_EXTENSION: "top", + }, + ] + }, + }, + { + _SBE.STEPID: "06_grompp_eminim", + _SBE.STEP_TYPE: "grompp", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + "-r": False, + }, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "05_genion", + _SBE.INPUT_EXTENSION: "gro", + }, + { + _SBE.INPUT_SOURCE: "{file_base}/minim.mdp", + _SBE.INPUT_EXTENSION: "mdp", + }, + { + _SBE.INPUT_SOURCE: "05_genion", + _SBE.INPUT_EXTENSION: "top", + }, + { + _SBE.INPUT_SOURCE: "01_pdb2gmx", + _SBE.INPUT_EXTENSION: "itp", + }, + ] + }, + }, + { + _SBE.STEPID: "07_eminim_mdrun", + _SBE.STEP_TYPE: "mdrun", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "06_grompp_eminim", + _SBE.INPUT_EXTENSION: "tpr", + } + ] + }, + }, + { + _SBE.STEPID: "08_nvt_grompp", + _SBE.STEP_TYPE: "grompp", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-n": "index.ndx"}, + }, + _SBE.SETTINGS_ADDITIONAL: { + "-r": True, + "make_ndx_command": "auto", + }, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "07_eminim_mdrun", + _SBE.INPUT_EXTENSION: "gro", + }, + { + _SBE.INPUT_SOURCE: "05_genion", + _SBE.INPUT_EXTENSION: "top", + }, + { + _SBE.INPUT_SOURCE: "{file_base}/nvt_equil.mdp", + _SBE.INPUT_EXTENSION: "mdp", + }, + { + _SBE.INPUT_SOURCE: "01_pdb2gmx", + _SBE.INPUT_EXTENSION: "itp", + }, + ] + }, + }, + { + _SBE.STEPID: "09_nvt_mdrun", + _SBE.STEP_TYPE: "mdrun", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {}, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "08_nvt_grompp", + _SBE.INPUT_EXTENSION: "tpr", + } + ] + }, + }, + { + _SBE.STEPID: "10_npt_grompp", + _SBE.STEP_TYPE: "grompp", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-n": "index.ndx"}, + }, + _SBE.SETTINGS_ADDITIONAL: { + "-r": True, + "make_ndx_command": "auto", + }, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "09_nvt_mdrun", + _SBE.INPUT_EXTENSION: "gro", + }, + { + _SBE.INPUT_SOURCE: "05_genion", + _SBE.INPUT_EXTENSION: "top", + }, + { + _SBE.INPUT_SOURCE: "{file_base}/npt_equil.mdp", + _SBE.INPUT_EXTENSION: "mdp", + }, + { + _SBE.INPUT_SOURCE: "01_pdb2gmx", + _SBE.INPUT_EXTENSION: "itp", + }, + ] + }, + }, + { + _SBE.STEPID: "11_npt_mdrun", + _SBE.STEP_TYPE: "mdrun", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {}, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "10_npt_grompp", + _SBE.INPUT_EXTENSION: "tpr", + } + ] + }, + }, + { + _SBE.STEPID: "12_prod_md_grompp", + _SBE.STEP_TYPE: "grompp", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-n": "index.ndx"}, + }, + _SBE.SETTINGS_ADDITIONAL: { + "-r": False, + "make_ndx_command": "auto", + "fields": {"nsteps": "5000"}, + }, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "11_npt_mdrun", + _SBE.INPUT_EXTENSION: "gro", + }, + { + _SBE.INPUT_SOURCE: "05_genion", + _SBE.INPUT_EXTENSION: "top", + }, + { + _SBE.INPUT_SOURCE: "{file_base}/md.mdp", + _SBE.INPUT_EXTENSION: "mdp", + }, + { + _SBE.INPUT_SOURCE: "01_pdb2gmx", + _SBE.INPUT_EXTENSION: "itp", + }, + ] + }, + }, + { + _SBE.STEPID: "13_prod_mdrun", + _SBE.STEP_TYPE: "mdrun", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-nb": "gpu", + "-bonded": "gpu", + "-pme": "gpu", + "-c": "structure.pdb", + }, + } + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "12_prod_md_grompp", + _SBE.INPUT_EXTENSION: "tpr", + } + ] + }, + "writeout": [ + { + _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "xtc"}, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1.xtc", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT", + }, + }, + { + _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "log"}, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1.log", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT", + }, + }, + { + _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "gro"}, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1.gro", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT", + }, + }, + { + _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "tpr"}, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1.tpr", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT", + }, + }, + ], + }, + { + _SBE.STEPID: "14_trjconv", + _SBE.STEP_TYPE: "trjconv", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-center"] + }, + _SBE.SETTINGS_ADDITIONAL: {"pipe_input": "echo -ne 1 0"}, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "13_prod_mdrun", + _SBE.INPUT_EXTENSION: "xtc", + }, + { + _SBE.INPUT_SOURCE: "13_prod_mdrun", + _SBE.INPUT_EXTENSION: "tpr", + }, + ] + }, + "writeout": [ + { + _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "xtc"}, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1_trjconv.xtc", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT", + }, + } + ], + }, + { + _SBE.STEPID: "15_MDpocket", + _SBE.STEP_TYPE: "mdpocket", + _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load fpocket"}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {} + }, + _SBE.SETTINGS_ADDITIONAL: {"format": "gromacs"}, + _SBE.SETTINGS_ADDITIONAL: {"format": "gromacs"}, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "14_trjconv", + _SBE.INPUT_EXTENSION: "xtc", + }, + { + _SBE.INPUT_SOURCE: "13_prod_mdrun", + _SBE.INPUT_EXTENSION: "pdb", + }, + ] + }, + _SBE.WRITEOUT: [ + { + _SBE.WRITEOUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "pdb"}, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT", + _SBE.WRITEOUT_DESTINATION_MODE: "dir", + }, + } + ], + }, + ], + } + export_unit_test_env_vars() + wflow = WorkFlow(**conf) + wflow.initialize() + + self.assertEqual(len(wflow.steps), 15) + + wflow.execute() + + out_path = os.path.join(self._test_dir, "md_0_1_0.xtc") + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 62400) + + def test_md_ligparam(self): + + conf = { + _WE.HEADER: { + _WE.ID: "gromacs_test_ligparam", + _WE.DESCRIPTION: "full md run with gromacs, with ligand parametrisation", + _WE.ENVIRONMENT: { + _WE.ENVIRONMENT_EXPORT: [ + { + _WE.ENVIRONMENT_EXPORT_KEY: "GMX_GPU_DD_COMMS", + _WE.ENVIRONMENT_EXPORT_VALUE: "True", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "GMX_GPU_PME_PP_COMMS", + _WE.ENVIRONMENT_EXPORT_VALUE: "True", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "GMX_FORCE_UPDATE_DEFAULT_GPU", + _WE.ENVIRONMENT_EXPORT_VALUE: "True", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "ACPYPE", + _WE.ENVIRONMENT_EXPORT_VALUE: "/projects/cc/mai/binaries/acpype", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "GMXLIB", + _WE.ENVIRONMENT_EXPORT_VALUE: "/gmx_workflow/forcefields/", + }, + ] + }, + _WE.GLOBAL_VARIABLES: { + "forcefield": "/gmx_workflow/forcefields/amber14sb_OL15.ff", + "output_dir": attach_root_path("tests/junk/integration"), + "file_base": PATHS_EXAMPLEDATA.GROMACS_PROTEIN_FILE_BASE, + }, + }, + _WE.STEPS: [ + { + _SBE.STEPID: "01_pdb2gmx", + _SBE.STEP_TYPE: "pdb2gmx", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-ignh"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-water": "tip4p", + "-ff": "amber14sb_OL15", + }, + }, + _SBE.SETTINGS_ADDITIONAL: {"forcefield": "{forcefield}"}, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: attach_root_path( + PATHS_EXAMPLEDATA.GROMACS_HOLO_STRUCTURE + ), + _SBE.INPUT_EXTENSION: "pdb", + } + ] + }, + }, + { + _SBE.STEPID: "02_editconf", + _SBE.STEP_TYPE: "editconf", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-d": "1.5", + "-bt": "dodecahedron", + }, + }, + _SBE.SETTINGS_ADDITIONAL: {"forcefield": "{forcefield}"}, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "01_pdb2gmx", + _SBE.INPUT_EXTENSION: "gro", + } + ] + }, + }, + { + _SBE.STEPID: "03_solvate", + _SBE.STEP_TYPE: "solvate", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-cs": "tip4p"}, + }, + _SBE.SETTINGS_ADDITIONAL: {}, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "02_editconf", + _SBE.INPUT_EXTENSION: "gro", + }, + { + _SBE.INPUT_SOURCE: "01_pdb2gmx", + _SBE.INPUT_EXTENSION: "top", + }, + ] + }, + }, + { + _SBE.STEPID: "04_grompp", + _SBE.STEP_TYPE: "grompp", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + "-r": False, + }, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "03_solvate", + _SBE.INPUT_EXTENSION: "gro", + }, + { + _SBE.INPUT_SOURCE: "{file_base}/ions.mdp", + _SBE.INPUT_EXTENSION: "mdp", + }, + { + _SBE.INPUT_SOURCE: "03_solvate", + _SBE.INPUT_EXTENSION: "top", + }, + { + _SBE.INPUT_SOURCE: "01_pdb2gmx", + _SBE.INPUT_EXTENSION: "itp", + }, + ] + }, + }, + { + _SBE.STEPID: "05_genion", + _SBE.STEP_TYPE: "genion", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-neutral"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-pname": "NA", + "-nname": "CL", + }, + }, + _SBE.SETTINGS_ADDITIONAL: { + "pipe_input": "SOL", + }, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "04_grompp", + _SBE.INPUT_EXTENSION: "tpr", + }, + { + _SBE.INPUT_SOURCE: "04_grompp", + _SBE.INPUT_EXTENSION: "top", + }, + ] + }, + }, + { + _SBE.STEPID: "06_grompp_eminim", + _SBE.STEP_TYPE: "grompp", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + "-r": False, + }, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "05_genion", + _SBE.INPUT_EXTENSION: "gro", + }, + { + _SBE.INPUT_SOURCE: "{file_base}/minim.mdp", + _SBE.INPUT_EXTENSION: "mdp", + }, + { + _SBE.INPUT_SOURCE: "05_genion", + _SBE.INPUT_EXTENSION: "top", + }, + { + _SBE.INPUT_SOURCE: "01_pdb2gmx", + _SBE.INPUT_EXTENSION: "itp", + }, + ] + }, + }, + { + _SBE.STEPID: "07_eminim_mdrun", + _SBE.STEP_TYPE: "mdrun", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {}, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "06_grompp_eminim", + _SBE.INPUT_EXTENSION: "tpr", + } + ] + }, + }, + { + _SBE.STEPID: "08_nvt_grompp", + _SBE.STEP_TYPE: "grompp", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-n": "index.ndx"}, + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-n": "index.ndx"}, + }, + _SBE.SETTINGS_ADDITIONAL: { + "-r": True, + "make_ndx_command": "auto", + }, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "07_eminim_mdrun", + _SBE.INPUT_EXTENSION: "gro", + }, + { + _SBE.INPUT_SOURCE: "05_genion", + _SBE.INPUT_EXTENSION: "top", + }, + { + _SBE.INPUT_SOURCE: "{file_base}/nvt_equil.mdp", + _SBE.INPUT_EXTENSION: "mdp", + }, + { + _SBE.INPUT_SOURCE: "01_pdb2gmx", + _SBE.INPUT_EXTENSION: "itp", + }, + ] + }, + }, + { + _SBE.STEPID: "09_nvt_mdrun", + _SBE.STEP_TYPE: "mdrun", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {}, + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {}, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "08_nvt_grompp", + _SBE.INPUT_EXTENSION: "tpr", + } + ] + }, + }, + { + _SBE.STEPID: "10_npt_grompp", + _SBE.STEP_TYPE: "grompp", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-n": "index.ndx"}, + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-n": "index.ndx"}, + }, + _SBE.SETTINGS_ADDITIONAL: { + "-r": True, + "make_ndx_command": "auto", + }, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "09_nvt_mdrun", + _SBE.INPUT_EXTENSION: "gro", + }, + { + _SBE.INPUT_SOURCE: "05_genion", + _SBE.INPUT_EXTENSION: "top", + }, + { + _SBE.INPUT_SOURCE: "{file_base}/npt_equil.mdp", + _SBE.INPUT_EXTENSION: "mdp", + }, + { + _SBE.INPUT_SOURCE: "01_pdb2gmx", + _SBE.INPUT_EXTENSION: "itp", + }, + ] + }, + }, + { + _SBE.STEPID: "11_npt_mdrun", + _SBE.STEP_TYPE: "mdrun", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {}, + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + _SBE.SETTINGS_ADDITIONAL: {}, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "10_npt_grompp", + _SBE.INPUT_EXTENSION: "tpr", + } + ] + }, + }, + { + _SBE.STEPID: "12_prod_md_grompp", + _SBE.STEP_TYPE: "grompp", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-n": "index.ndx", + }, + }, + _SBE.SETTINGS_ADDITIONAL: { + "-r": False, + "fields": {"nsteps": "5000"}, + "make_ndx_command": "auto", + "fields": {"nsteps": "5000"}, + "make_ndx_command": "auto", + }, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "11_npt_mdrun", + _SBE.INPUT_EXTENSION: "gro", + }, + { + _SBE.INPUT_SOURCE: "05_genion", + _SBE.INPUT_EXTENSION: "top", + }, + { + _SBE.INPUT_SOURCE: "{file_base}/md.mdp", + _SBE.INPUT_EXTENSION: "mdp", + }, + { + _SBE.INPUT_SOURCE: "01_pdb2gmx", + _SBE.INPUT_EXTENSION: "itp", + }, + ] + }, + }, + { + _SBE.STEPID: "13_prod_mdrun", + _SBE.STEP_TYPE: "mdrun", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-nb": "gpu", + "-bonded": "gpu", + "-pme": "gpu", + "-c": "structure.pdb", + }, + } + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "12_prod_md_grompp", + _SBE.INPUT_EXTENSION: "tpr", + } + ] + }, + _SBE.WRITEOUT: [ + { + _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "xtc"}, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1.xtc", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT", + }, + }, + { + _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "log"}, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1.log", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT", + }, + }, + { + _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "gro"}, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1.gro", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT", + }, + }, + { + _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "tpr"}, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1.tpr", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT", + }, + }, + ], + }, + { + _SBE.STEPID: "14_trjconv", + _SBE.STEP_TYPE: "trjconv", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-center"] + }, + _SBE.SETTINGS_ADDITIONAL: {"pipe_input": "echo -ne 1 0"}, + }, + _SBE.INPUT: { + _SBE.INPUT_GENERIC: [ + { + _SBE.INPUT_SOURCE: "13_prod_mdrun", + _SBE.INPUT_EXTENSION: "xtc", + }, + { + _SBE.INPUT_SOURCE: "13_prod_mdrun", + _SBE.INPUT_EXTENSION: "tpr", + }, + ] + }, + _SBE.WRITEOUT: [ + { + _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "xtc"}, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1_trjconv.xtc", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT", + }, + } + ], + }, + ], + } + + export_unit_test_env_vars() + wflow = WorkFlow(**conf) + wflow.initialize() + + self.assertEqual(len(wflow.steps), 14) + + wflow.execute() + + out_path = os.path.join(self._test_dir, "md_0_1_0.xtc") + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 324000) diff --git a/tests/integration_tests/test_rmsd_iter.py b/tests/integration_tests/test_rmsd_iter.py new file mode 100644 index 0000000..6556e3e --- /dev/null +++ b/tests/integration_tests/test_rmsd_iter.py @@ -0,0 +1,344 @@ +import unittest +import os +from tests.tests_paths import PATHS_1UYD +from icolos.utils.general.files_paths import attach_root_path +from icolos.core.composite_agents.workflow import WorkFlow +from icolos.utils.enums.composite_agents_enums import WorkflowEnum +from icolos.utils.enums.step_enums import StepBaseEnum, StepGlideEnum + + +_WE = WorkflowEnum() +_SBE = StepBaseEnum +_SGE = StepGlideEnum() + + +class TestDockingWorkflow(unittest.TestCase): + @classmethod + def setUpClass(cls): + + cls._test_dir = attach_root_path("tests/junk/integration") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + @classmethod + def tearDownClass(cls): + pass + + def test_iterator_workflow(self): + """ + Runs the RMSD-corrected docking workflow using multiple xtb settings in parallel + """ + + conf = { + _WE.HEADER: { + _WE.ID: "RMSD_rescoring", + _WE.DESCRIPTION: "Run RMSD rescoring on docking pose", + _WE.ENVIRONMENT: { + _WE.ENVIRONMENT_EXPORT: [ + { + _WE.ENVIRONMENT_EXPORT_KEY: "OE_LICENSE", + _WE.ENVIRONMENT_EXPORT_VALUE: "/opt/scp/software/oelicense/1.0/oe_license.seq1", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "XTBHOME", + _WE.ENVIRONMENT_EXPORT_VALUE: "/projects/cc/mai/binaries/xtb-6.4.0", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "XTBPATH", + _WE.ENVIRONMENT_EXPORT_VALUE: "${XTBHOME}/share/xtb", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "PATH", + _WE.ENVIRONMENT_EXPORT_VALUE: "${PATH}:${XTBHOME}/bin", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "PKG_CONFIG_PATH", + _WE.ENVIRONMENT_EXPORT_VALUE: "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "PARA_ARCH", + _WE.ENVIRONMENT_EXPORT_VALUE: "MPI", + }, + { + _WE.ENVIRONMENT_EXPORT_KEY: "PARNODES", + _WE.ENVIRONMENT_EXPORT_VALUE: "6", + }, + ] + }, + _WE.GLOBAL_VARIABLES: { + "smiles": "3,4-DIAMINOBENZOTRIFLUORIDE:Nc1ccc(cc1N)C(F)(F)F" + }, + }, + _WE.STEPS: [ + { + _SBE.STEPID: "initialization_smile", + _SBE.STEP_TYPE: "initialization", + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "{smiles}", + _SBE.INPUT_SOURCE_TYPE: "string", + } + ] + }, + }, + { + _SBE.STEPID: "Ligprep", + _SBE.STEP_TYPE: "ligprep", + _SBE.EXEC: { + "prefix_execution": "module load schrodinger/2020-4", + "parallelization": {"cores": 2, "max_length_sublists": 1}, + "failure_policy": {"n_tries": 3}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-epik"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-ph": 7.0, + "-pht": 2.0, + "-s": 10, + "-bff": 14, + }, + }, + _SBE.SETTINGS_ADDITIONAL: { + "filter_file": {"Total_charge": "!= 0"} + }, + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "initialization_smile", + _SBE.INPUT_SOURCE_TYPE: "step", + } + ] + }, + }, + { + _SBE.STEPID: "Glide", + _SBE.STEP_TYPE: "glide", + _SBE.EXEC: { + "prefix_execution": "module load schrodinger/2020-4", + "parallelization": {"cores": 4, "max_length_sublists": 1}, + "failure_policy": {"n_tries": 3}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + "configuration": { + "AMIDE_MODE": "trans", + "EXPANDED_SAMPLING": "True", + "GRIDFILE": [PATHS_1UYD.GRID_PATH], + "NENHANCED_SAMPLING": "1", + "POSE_OUTTYPE": "ligandlib_sd", + "POSES_PER_LIG": "3", + "POSTDOCK_NPOSE": "25", + "POSTDOCKSTRAIN": "True", + "PRECISION": "SP", + "REWARD_INTRA_HBONDS": "True", + } + }, + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "Ligprep", + _SBE.INPUT_SOURCE_TYPE: "step", + } + ] + }, + _SBE.WRITEOUT: [ + { + _SBE.INPUT_COMPOUNDS: { + _SBE.WRITEOUT_COMP_CATEGORY: "conformers" + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: "tests/junk/integration/rmsd_rescoring_docked_conformers.sdf", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "SDF", + }, + }, + { + _SBE.INPUT_COMPOUNDS: { + _SBE.WRITEOUT_COMP_CATEGORY: "conformers", + _SBE.WRITEOUT_COMP_SELECTED_TAGS: [ + "docking_score", + "grid_id", + ], + _SBE.WRITEOUT_COMP_AGGREGATION: { + _SBE.WRITEOUT_COMP_AGGREGATION_MODE: "best_per_compound", + _WE.ENVIRONMENT_EXPORT_KEY: "docking_score", + _SBE.WRITEOUT_COMP_AGGREGATION_HIGHESTISBEST: False, + }, + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: "tests/junk/integration/rmsd_rescoring_docked_conformers.csv", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "CSV", + }, + }, + ], + }, + { + _SBE.STEPID: "compound_filter", + _SBE.STEP_TYPE: "filter", + _SBE.SETTINGS: { + _SBE.SETTINGS_ADDITIONAL: { + "filter_level": _SBE.INPUT_COMPOUNDS, + "criteria": "docking_score", + "return_n": 1, + "highest_is_best": False, + } + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + {_SBE.INPUT_SOURCE: "Glide", _SBE.INPUT_SOURCE_TYPE: "step"} + ] + }, + }, + { + _SBE.STEPID: "test_iterator", + _SBE.STEP_TYPE: "iterator", + "base_config": [ + { + _SBE.STEPID: "xtb", + _SBE.STEP_TYPE: "xtb", + _SBE.EXEC: { + "binary_location": "/projects/cc/mai/binaries/xtb-6.4.0", + "parallelization": {"cores": 4}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "--gbsa": "h2o" + }, + } + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "compound_filter", + _SBE.INPUT_SOURCE_TYPE: "step", + } + ] + }, + _SBE.WRITEOUT: [ + { + _SBE.INPUT_COMPOUNDS: { + _SBE.WRITEOUT_COMP_CATEGORY: "conformers" + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: "tests/junk/rmsd_rescoring_xtb.sdf", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "SDF", + }, + } + ], + }, + { + _SBE.STEPID: "data_manipulation", + _SBE.STEP_TYPE: "data_manipulation", + _SBE.SETTINGS: { + _SBE.SETTINGS_ADDITIONAL: { + "action": "attach_conformers_as_extra", + _SBE.INPUT_SOURCE: "xtb", + } + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "compound_filter", + _SBE.INPUT_SOURCE_TYPE: "step", + } + ] + }, + }, + { + _SBE.STEPID: "rmsd", + _SBE.STEP_TYPE: "rmsd", + _SBE.SETTINGS: { + _SBE.SETTINGS_ADDITIONAL: {"method": "alignmol"} + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "data_manipulation", + _SBE.INPUT_SOURCE_TYPE: "step", + } + ] + }, + _SBE.WRITEOUT: [ + { + _SBE.INPUT_COMPOUNDS: { + _SBE.WRITEOUT_COMP_CATEGORY: "conformers" + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: "tests/junk/integration/rmsd_rescoring.sdf", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "SDF", + }, + }, + { + _SBE.INPUT_COMPOUNDS: { + _SBE.WRITEOUT_COMP_CATEGORY: "conformers", + _SBE.WRITEOUT_COMP_SELECTED_TAGS: [ + "docking_score", + "rmsd", + "grid_id", + ], + _SBE.WRITEOUT_COMP_AGGREGATION: { + _SBE.WRITEOUT_COMP_AGGREGATION_MODE: "best_per_compound", + _WE.ENVIRONMENT_EXPORT_KEY: "docking_score", + }, + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_RESOURCE: "tests/junk/integration/rmsd_rescoring.csv", + _SBE.STEP_TYPE: "file", + _SBE.WRITEOUT_DESTINATION_FORMAT: "CSV", + }, + }, + ], + }, + ], + "iter_settings": { + _SBE.SETTINGS: { + "xtb": { + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "--opt": [ + "vtight", + "vtight", + "vtight", + "vtight", + "vtight", + "vtight", + "vtight", + "tight", + ] + } + } + }, + "n_iters": 8, + "iter_mode": "single", + "parallelizer_settings": { + "parallelize": True, + "cores": 8, + "max_length_sublists": 3, + }, + }, + }, + ], + } + + wflow = WorkFlow(**conf) + wflow.initialize() + + self.assertEqual(len(wflow.steps), 5) + + wflow.execute() + + out_path = os.path.join(self._test_dir, "run_0/rmsd_rescoring.csv") + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 82) diff --git a/tests/io/__init__.py b/tests/io/__init__.py new file mode 100644 index 0000000..123d3ff --- /dev/null +++ b/tests/io/__init__.py @@ -0,0 +1,3 @@ +from tests.io.test_initialize_compound import * +from tests.io.test_embedder import * +from tests.io.test_data_manipulation import Test_DataManipulation diff --git a/tests/io/test_data_manipulation.py b/tests/io/test_data_manipulation.py new file mode 100644 index 0000000..905ace1 --- /dev/null +++ b/tests/io/test_data_manipulation.py @@ -0,0 +1,325 @@ +import unittest +from copy import deepcopy + +from rdkit.Geometry.rdGeometry import Point3D + +from icolos.core.composite_agents.workflow import WorkFlow +from icolos.core.containers.compound import Compound, Conformer, Enumeration +from tests.tests_paths import PATHS_1UYD +from icolos.core.containers.generic import GenericData +from icolos.core.workflow_steps.io.data_manipulation import StepDataManipulation +from icolos.core.workflow_steps.step import StepBase +from icolos.utils.enums.composite_agents_enums import WorkflowEnum +from rdkit.Chem import SDMolSupplier + +from icolos.utils.enums.step_enums import ( + StepBaseEnum, + StepDataManipulationEnum, + StepFilterEnum, +) +from icolos.utils.general.files_paths import attach_root_path, empty_output_dir +import os +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + export_unit_test_env_vars, + get_ligands_as_compounds_with_conformers, + get_mol_as_Conformer, +) + +_SBE = StepBaseEnum +_SDM = StepDataManipulationEnum() +_WE = WorkflowEnum() +_SFE = StepFilterEnum() + + +class Test_DataManipulation(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/data_manip") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def _get_step_filter_compounds(self): + # produce the compounds object for testing + # return 5 dummy compounds with 1 enumeration and 5 conformers per enumeration + mols = SDMolSupplier(attach_root_path(PATHS_1UYD.LIGANDS)) + mol = mols[0] + compounds = [] + for i in range(5): + compound = Compound(name=str(i), compound_number=i) + enum = Enumeration() + for i in range(5): + conf = Conformer(conformer=mol, conformer_id=i, enumeration_object=enum) + enum.add_conformer(conformer=conf) + compound.add_enumeration(enum) + compounds.append(compound) + return compounds + + def setUp(self): + self._compounds = self._get_step_filter_compounds() + # dG score gets gradually worse, prime gets gradually worse during the conformer walk + dG_value = -13 + prime_value = -2900 + for compound in self._compounds: + for enum in compound.get_enumerations(): + for conformer in enum.get_conformers(): + conformer.get_molecule().SetProp("dG", str(dG_value)) + conformer.get_molecule().SetProp( + "r_psp_MMGBSA_dG_Bind", str(prime_value) + ) + dG_value += 0.2 + prime_value -= 10 + # remove files from previous runs + empty_output_dir(self._test_dir) + + with open(PATHS_EXAMPLEDATA.PRIME_RECEPTOR_COX2, "r") as f: + data = f.read() + self.complex_conformers = get_ligands_as_compounds_with_conformers( + PATHS_EXAMPLEDATA.LIGANDS_1UYD + ) + self.mae_file = GenericData(file_name="structure.mae", file_data=data) + conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS) + + # Compound 1 with 1 enumeration and 11 conformers + self.comp1 = Compound(compound_number=1) + self.comp1.add_enumeration(Enumeration(), auto_update=True) + self.comp1[0].add_conformers(deepcopy(conformers), auto_update=True) + + # Compound 2 with 1 enumeration and 11 conformers, change of some coordinates + self.comp2 = Compound(compound_number=1) + self.comp2.add_enumeration(Enumeration(), auto_update=True) + self.comp2[0].add_conformers(deepcopy(conformers), auto_update=True) + self.comp2[0][1].get_molecule().GetConformer().SetAtomPosition( + 0, Point3D(-4.2239, -0.441, 0.2458) + ) + self.comp2[0][7].get_molecule().GetConformer().SetAtomPosition( + 0, Point3D(-1.5442, -0.7854, 0.5883) + ) + + # workflow (necessary to pass on data) + conf = { + _WE.HEADER: {_WE.ID: "test_workflow", _WE.ENVIRONMENT: {}}, + _WE.STEPS: [], + } + self.workflow = WorkFlow(**conf) + + # dummy step + step_conf = { + _SBE.STEPID: "01_dummy", + _SBE.STEP_TYPE: "dummy", + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {}, + }, + } + dummy_step = StepBase(**step_conf) + dummy_step.get_compounds().append(self.comp2) + dummy_step.set_workflow_object(self.workflow) + self.workflow.add_step(dummy_step) + + @classmethod + def tearDownClass(cls): + pass + + def test_matching(self): + step_conf = { + _SBE.STEPID: "01_data_manip", + _SBE.STEP_TYPE: _SBE.STEP_DATA_MANIPULATION, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SDM.ACTION: _SDM.ACTION_ATTACH_CONFORMERS_AS_EXTRA + }, + }, + } + manip_step = StepDataManipulation(**step_conf) + manip_step.get_compounds().append(self.comp1) + manip_step.set_workflow_object(self.workflow) + self.workflow.add_step(manip_step) + + manip_step.settings.additional[_SDM.MATCH_SOURCE] = "01_dummy" + manip_step.execute() + + self.assertEqual( + manip_step.get_compounds()[0][0][2] + .get_extra_data()[_SDM.KEY_MATCHED][0] + .get_index_string(), + "1:0:2", + ) + + def test_no_action(self): + step_conf = { + _SBE.STEPID: "01_data_manip", + _SBE.STEP_TYPE: _SBE.STEP_DATA_MANIPULATION, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {_SDM.ACTION: _SDM.ACTION_NO_ACTION}, + }, + } + manip_step = StepDataManipulation(**step_conf) + manip_step.get_compounds().append(self.comp1) + manip_step.set_workflow_object(self.workflow) + self.workflow.add_step(manip_step) + + manip_step.settings.additional[_SDM.MATCH_SOURCE] = "01_dummy" + manip_step.execute() + + self.assertEqual(len(manip_step.get_compounds()[0][0]), 11) + + def test_convert_mae2pdb(self): + step_conf = { + _SBE.STEPID: "01_data_manip", + _SBE.STEP_TYPE: _SBE.STEP_DATA_MANIPULATION, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {_SDM.ACTION: _SDM.CONVERT_MAE_TO_PDB}, + }, + } + manip_step = StepDataManipulation(**step_conf) + manip_step.set_workflow_object(self.workflow) + manip_step.data.generic.add_file(self.mae_file) + self.workflow.add_step(manip_step) + + manip_step.execute() + out_path = os.path.join(self._test_dir, "structure.pdb") + manip_step.write_generic_by_extension(self._test_dir, "pdb") + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 734400) + + def test_get_complexes(self): + step_conf = { + _SBE.STEPID: "01_data_manip", + _SBE.STEP_TYPE: _SBE.STEP_DATA_MANIPULATION, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SDM.ACTION: _SDM.ASSEMBLE_COMPLEXES, + _SDM.RECEPTOR: PATHS_EXAMPLEDATA.RECEPTOR_1UYD, + }, + }, + } + manip_step = StepDataManipulation(**step_conf) + manip_step.data.compounds = self.complex_conformers + manip_step.set_workflow_object(self.workflow) + self.workflow.add_step(manip_step) + + manip_step.execute() + out_path = os.path.join(self._test_dir, "0:0:0.pdb") + manip_step.write_generic_by_extension(self._test_dir, "pdb") + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 509600) + + def test_filtering(self): + step_conf = { + _SBE.STEPID: "01_filtering", + _SBE.STEP_TYPE: _SBE.STEP_DATA_MANIPULATION, + _SBE.SETTINGS: { + _SBE.SETTINGS_ADDITIONAL: { + _SDM.ACTION: _SDM.FILTER, + _SFE.FILTER_LEVEL: "enumerations", + _SFE.CRITERIA: "dG", + _SFE.RETURN_N: 3, + _SFE.HIGHEST_IS_BEST: False, + } + }, + } + + step_filter = StepDataManipulation(**step_conf) + step_filter.data.compounds = self._compounds + + step_filter.execute() + dG_max = ( + step_filter.data.compounds[0] + .get_enumerations()[0] + .get_conformers()[0] + .get_molecule() + .GetProp("dG") + ) + step_filter.write_conformers( + path=os.path.join(self._test_dir, "filtered_confs.sdf") + ) + out_path = os.path.join(self._test_dir, "filtered_confs.sdf") + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 39708) + self.assertEqual(int(dG_max), -13) + + def test_combined_filtering(self): + # filter based on a sum of two criteria attached to each conformer + step_conf = { + _SBE.STEPID: "01_filtering", + _SBE.STEP_TYPE: _SBE.STEP_DATA_MANIPULATION, + _SBE.SETTINGS: { + _SBE.SETTINGS_ADDITIONAL: { + _SDM.ACTION: _SDM.FILTER, + _SFE.FILTER_LEVEL: "enumerations", + _SFE.CRITERIA: ["dG", "r_psp_MMGBSA_dG_Bind"], + _SFE.RETURN_N: 3, + _SFE.HIGHEST_IS_BEST: False, + _SFE.AGGREGATION: "sum", + } + }, + } + + step_filter = StepDataManipulation(**step_conf) + step_filter.data.compounds = self._compounds + step_filter.execute() + + dG_bind_max = ( + step_filter.data.compounds[0] + .get_enumerations()[0] + .get_conformers()[0] + .get_molecule() + .GetProp("r_psp_MMGBSA_dG_Bind") + ) + # check we can get single values pack properly + self.assertEqual(int(dG_bind_max), -2900) + self.assertEqual(len(step_filter.data.compounds), 5) + self.assertEqual(len(step_filter.data.compounds[0][0].get_conformers()), 3) + + def test_combined_filtering_compound_level(self): + # filter at the compound level + step_conf = { + _SBE.STEPID: "01_filtering", + _SBE.STEP_TYPE: _SBE.STEP_DATA_MANIPULATION, + _SBE.SETTINGS: { + _SBE.SETTINGS_ADDITIONAL: { + _SDM.ACTION: _SDM.FILTER, + _SFE.FILTER_LEVEL: "compounds", + _SFE.CRITERIA: ["dG", "r_psp_MMGBSA_dG_Bind"], + _SFE.RETURN_N: 3, + _SFE.HIGHEST_IS_BEST: False, + _SFE.AGGREGATION: "sum", + } + }, + } + + step_filter = StepDataManipulation(**step_conf) + step_filter.data.compounds = self._compounds + step_filter.execute() + + dG_bind_max = ( + step_filter.data.compounds[0] + .get_enumerations()[0] + .get_conformers()[0] + .get_molecule() + .GetProp("r_psp_MMGBSA_dG_Bind") + ) + # check we can get single values pack properly + self.assertEqual(int(dG_bind_max), -2900) diff --git a/tests/io/test_embedder.py b/tests/io/test_embedder.py new file mode 100644 index 0000000..7ad60ae --- /dev/null +++ b/tests/io/test_embedder.py @@ -0,0 +1,135 @@ +import unittest + +from icolos.core.workflow_steps.io.embedder import StepEmbedding +from icolos.utils.enums.step_enums import StepBaseEnum, StepEmbeddingEnum + +from tests.tests_paths import PATHS_EXAMPLEDATA + +_SBE = StepBaseEnum +_SEE = StepEmbeddingEnum() + + +class Test_Embedder(unittest.TestCase): + @classmethod + def setUpClass(cls): + pass + + def setUp(self): + self._SMI_path = PATHS_EXAMPLEDATA.MEDIUM_MOLECULES_SMI_PATH + + @classmethod + def tearDownClass(cls): + pass + + def test_embed_with_RDkit_no_protonation(self): + step_conf = { + _SBE.STEPID: "01_embed_molecule", + _SBE.STEP_TYPE: _SBE.STEP_EMBEDDING, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _SEE.RDKIT_PROTONATE: False, + _SEE.METHOD: _SEE.METHOD_RDKIT, + } + } + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: self._SMI_path, + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE, + _SBE.INPUT_FORMAT: _SBE.FORMAT_SMI, + } + ] + }, + } + init_step = StepEmbedding(**step_conf) + init_step.generate_input() + init_step.execute() + + self.assertEqual(len(init_step.get_compounds()), 2) + self.assertEqual(len(init_step.get_compounds()[0]), 1) + self.assertEqual(len(init_step.get_compounds()[0][0]), 0) + + self.assertListEqual( + list( + init_step.get_compounds()[0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-2.9314762660385534, 0.06628711293694872, 4.923008037397455], + ) + self.assertListEqual( + list( + init_step.get_compounds()[1][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-2.6176730474256593, 0.37859007619202606, 0.6065857814585477], + ) + self.assertEqual( + 22, init_step.get_compounds()[0][0].get_molecule().GetNumAtoms() + ) + + def test_embed_with_RDkit_protonation(self): + step_conf = { + _SBE.STEPID: "01_embed_molecule", + _SBE.STEP_TYPE: _SBE.STEP_EMBEDDING, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _SEE.RDKIT_PROTONATE: True, + _SEE.METHOD: _SEE.METHOD_RDKIT, + } + } + }, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: self._SMI_path, + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE, + _SBE.INPUT_FORMAT: _SBE.FORMAT_SMI, + } + ] + }, + } + init_step = StepEmbedding(**step_conf) + init_step.generate_input() + init_step.execute() + + self.assertEqual(len(init_step.get_compounds()), 2) + self.assertEqual(len(init_step.get_compounds()[0]), 1) + self.assertEqual(len(init_step.get_compounds()[0][0]), 0) + + self.assertListEqual( + list( + init_step.get_compounds()[0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-2.9314762660385534, 0.06628711293694872, 4.923008037397455], + ) + self.assertListEqual( + list( + init_step.get_compounds()[1][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-2.6176730474256593, 0.37859007619202606, 0.6065857814585477], + ) + self.assertEqual( + 41, init_step.get_compounds()[0][0].get_molecule().GetNumAtoms() + ) + self.assertListEqual( + list( + init_step.get_compounds()[0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[40] + ), + [-3.576148794943472, -0.8051119546399829, -0.9424118920903588], + ) diff --git a/tests/io/test_initialize_compound.py b/tests/io/test_initialize_compound.py new file mode 100644 index 0000000..ea2e49f --- /dev/null +++ b/tests/io/test_initialize_compound.py @@ -0,0 +1,240 @@ +import unittest + +from icolos.core.workflow_steps.io.initialize_compound import StepInitializeCompound +from icolos.utils.enums.step_enums import StepBaseEnum +from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars + +_SBE = StepBaseEnum + + +class Test_InitializeCompound(unittest.TestCase): + @classmethod + def setUpClass(cls): + export_unit_test_env_vars() + + def setUp(self): + self._paracetamol_path = PATHS_EXAMPLEDATA.PARACETAMOL_PATH + self._SMI_path = PATHS_EXAMPLEDATA.MEDIUM_MOLECULES_SMI_PATH + self._JSON_path = PATHS_EXAMPLEDATA.SMALL_MOLECULES_JSON_PATH + self._CSV_path = PATHS_EXAMPLEDATA.SMALL_MOLECULES_CSV_PATH + self._CSV_path_semicolon = ( + PATHS_EXAMPLEDATA.SMALL_MOLECULES_CSV_PATH_DELIMITER_SEMICOLON + ) + + @classmethod + def tearDownClass(cls): + pass + + def test_initialize_compound_SDF(self): + step_conf = { + _SBE.STEPID: "01_load_molecule", + _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: self._paracetamol_path, + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE, + _SBE.INPUT_FORMAT: _SBE.FORMAT_SDF, + } + ] + }, + } + init_step = StepInitializeCompound(**step_conf) + init_step.generate_input() + init_step.execute() + + self.assertEqual(len(init_step.get_compounds()), 1) + self.assertEqual(len(init_step.get_compounds()[0]), 1) + self.assertEqual(len(init_step.get_compounds()[0][0]), 1) + + self.assertListEqual( + list( + init_step.get_compounds()[0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-3.8276, -1.0625, 0.3279], + ) + + def test_initialize_compound_SMI(self): + step_conf = { + _SBE.STEPID: "01_load_molecule", + _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: self._SMI_path, + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE, + _SBE.INPUT_FORMAT: _SBE.FORMAT_SMI, + } + ] + }, + } + init_step = StepInitializeCompound(**step_conf) + init_step.generate_input() + init_step.execute() + + self.assertEqual(len(init_step.get_compounds()), 2) + self.assertEqual(len(init_step.get_compounds()[0]), 1) + self.assertEqual(len(init_step.get_compounds()[0][0]), 0) + + self.assertEqual( + init_step.get_compounds()[0][0].get_smile(), + "CN(C)CCn1cc(c2ccc(F)cc2)c(n1)n3cccc3", + ) + self.assertEqual(init_step.get_compounds()[1].get_name(), "mol7") + + def test_initialize_compound_JSON(self): + step_conf = { + _SBE.STEPID: "01_load_molecule", + _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: self._JSON_path, + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE, + _SBE.INPUT_FORMAT: _SBE.FORMAT_JSON, + } + ] + }, + } + init_step = StepInitializeCompound(**step_conf) + init_step.generate_input() + init_step.execute() + + self.assertEqual(len(init_step.get_compounds()), 3) + self.assertEqual(len(init_step.get_compounds()[0]), 1) + self.assertEqual(len(init_step.get_compounds()[0][0]), 0) + + self.assertEqual( + init_step.get_compounds()[0][0].get_smile(), + "C#CCCCn1c(Cc2cc(OC)c(OC)c(OC)c2Cl)nc2c(N)ncnc21", + ) + self.assertEqual(init_step.get_compounds()[1].get_name(), "1") + + def test_initialize_compound_smile(self): + step_conf = { + _SBE.STEPID: "01_load_molecule", + _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "abc:CN(C)CCn1cc(c2ccc(F)cc2)c(n1)n3cccc3;CN(C)CCn1cc(c2ccc(F)cc2)c(n1)n3cccc3", + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STRING, + } + ] + }, + } + init_step = StepInitializeCompound(**step_conf) + init_step.generate_input() + init_step.execute() + + self.assertEqual(len(init_step.get_compounds()), 2) + self.assertEqual(len(init_step.get_compounds()[0]), 1) + self.assertEqual(len(init_step.get_compounds()[0][0]), 0) + + self.assertEqual( + init_step.get_compounds()[0][0].get_smile(), + "CN(C)CCn1cc(c2ccc(F)cc2)c(n1)n3cccc3", + ) + self.assertEqual(init_step.get_compounds()[0].get_name(), "abc") + self.assertEqual(init_step.get_compounds()[1].get_name(), "1") + self.assertEqual(init_step.get_compounds()[1].get_compound_number(), 1) + + def test_initialize_compound_smile_enforceIDs(self): + step_conf = { + _SBE.STEPID: "01_load_molecule", + _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: "abc:CN(C)CCn1cc(c2ccc(F)cc2)c(n1)n3cccc3;CN(C)CCn1cc(c2ccc(F)cc2)c(n1)n3cccc3", + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STRING, + _SBE.INPUT_ENFORCE_IDS: { + _SBE.INPUT_ENFORCE_COMPOUND_IDS: ["3", 1], + _SBE.INPUT_ENFORCE_ENUMERATION_IDS: [10, 4], + }, + } + ], + }, + } + init_step = StepInitializeCompound(**step_conf) + init_step.generate_input() + init_step.execute() + + self.assertEqual(len(init_step.get_compounds()), 2) + self.assertEqual(len(init_step.get_compounds()[0]), 1) + self.assertEqual(len(init_step.get_compounds()[0][0]), 0) + + self.assertEqual( + init_step.get_compounds()[0][0].get_smile(), + "CN(C)CCn1cc(c2ccc(F)cc2)c(n1)n3cccc3", + ) + self.assertEqual(init_step.get_compounds()[0].get_name(), "abc") + self.assertEqual(init_step.get_compounds()[1].get_name(), "1") + self.assertEqual(init_step.get_compounds()[0].get_compound_number(), 3) + self.assertEqual(init_step.get_compounds()[1][0].get_enumeration_id(), 4) + + def test_initialize_compound_CSV(self): + step_conf = { + _SBE.STEPID: "01_load_molecule", + _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: self._CSV_path, + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE, + _SBE.INPUT_CSV_COLUMNS: { + _SBE.INPUT_CSV_SMILES_COLUMN: "SMILES" + }, + _SBE.INPUT_FORMAT: _SBE.FORMAT_CSV, + } + ] + }, + } + init_step = StepInitializeCompound(**step_conf) + init_step.generate_input() + init_step.execute() + + self.assertEqual(len(init_step.get_compounds()), 3) + self.assertEqual(len(init_step.get_compounds()[0]), 1) + self.assertEqual(len(init_step.get_compounds()[0][0]), 0) + + self.assertEqual( + init_step.get_compounds()[0][0].get_smile(), "COc1cc2sc(C)nc2cc1OC" + ) + self.assertEqual(init_step.get_compounds()[1].get_name(), "1") + + def test_initialize_compound_CSV_extended_options(self): + step_conf = { + _SBE.STEPID: "01_load_molecule", + _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION, + _SBE.INPUT: { + _SBE.INPUT_COMPOUNDS: [ + { + _SBE.INPUT_SOURCE: self._CSV_path_semicolon, + _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE, + _SBE.INPUT_CSV_DELIMITER: ";", + _SBE.INPUT_CSV_COLUMNS: { + _SBE.INPUT_CSV_SMILES_COLUMN: "SMILES", + _SBE.INPUT_CSV_NAMES_COLUMN: "name", + }, + _SBE.INPUT_FORMAT: _SBE.FORMAT_CSV, + } + ] + }, + } + init_step = StepInitializeCompound(**step_conf) + init_step.generate_input() + init_step.execute() + + self.assertEqual(len(init_step.get_compounds()), 3) + self.assertEqual(len(init_step.get_compounds()[0]), 1) + self.assertEqual(len(init_step.get_compounds()[0][0]), 0) + self.assertEqual(len(init_step.get_compounds()[2]), 2) + + self.assertEqual( + init_step.get_compounds()[0][0].get_smile(), "COc1cc2sc(C)nc2cc1OC" + ) + self.assertEqual(init_step.get_compounds()[1].get_name(), "mol2_a") diff --git a/tests/panther/__init__.py b/tests/panther/__init__.py new file mode 100644 index 0000000..230e18e --- /dev/null +++ b/tests/panther/__init__.py @@ -0,0 +1 @@ +from tests.panther.test_panther import * diff --git a/tests/panther/test_panther.py b/tests/panther/test_panther.py new file mode 100644 index 0000000..3359fa1 --- /dev/null +++ b/tests/panther/test_panther.py @@ -0,0 +1,48 @@ +from icolos.utils.enums.program_parameters import PantherEnum +import os +import unittest +from tests.tests_paths import PATHS_EXAMPLEDATA, MAIN_CONFIG + +from icolos.utils.enums.step_enums import StepBaseEnum, StepPantherEnum +from icolos.core.workflow_steps.calculation.panther import StepPanther +from icolos.utils.general.files_paths import attach_root_path + +_SBE = StepBaseEnum +_SPE = StepPantherEnum() +_PE = PantherEnum() + + +class Test_Panther(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/panther") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def test_panther_run(self): + step_conf = { + _SBE.STEPID: "01_panther", + _SBE.STEP_TYPE: _SBE.STEP_PANTHER, + _SBE.EXEC: {}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ADDITIONAL: { + _SPE.PANTHER_LOCATION: MAIN_CONFIG["PANTHER_LOCATION"], + _SPE.PANTHER_CONFIG_FILE: attach_root_path( + PATHS_EXAMPLEDATA.PANTHER_CONFIG + ), + _SPE.FIELDS: { + "1-Pdb file": attach_root_path( + PATHS_EXAMPLEDATA.PANTHER_RECEPTOR_PDB + ) + }, + } + }, + } + panther_step = StepPanther(**step_conf) + panther_step.execute() + + # check we get the negative image back + out_path = os.path.join(self._test_dir, "neg_image.mol2") + panther_step.write_generic_by_extension(self._test_dir, "mol2") + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 6044) diff --git a/tests/pmx/__init__.py b/tests/pmx/__init__.py new file mode 100644 index 0000000..b2e888a --- /dev/null +++ b/tests/pmx/__init__.py @@ -0,0 +1,11 @@ +from tests.pmx.test_prepare_simulations import * +from tests.pmx.test_analyse import * +from tests.pmx.test_prepare_transitions import * +from tests.pmx.test_atomMapping import * +from tests.pmx.test_ligandHybrid import * +from tests.pmx.test_box_water_ions import * +from tests.pmx.test_setup_workpath import * +from tests.pmx.test_assemble_systems import * + +# from tests.pmx.test_run_simulations import * +# from tests.pmx.test_abfe import * diff --git a/tests/pmx/test_abfe.py b/tests/pmx/test_abfe.py new file mode 100644 index 0000000..3d64edd --- /dev/null +++ b/tests/pmx/test_abfe.py @@ -0,0 +1,62 @@ +import unittest +import os +from icolos.core.containers.generic import GenericData +from icolos.core.workflow_steps.pmx.abfe import StepPMXabfe +from icolos.utils.enums.step_enums import StepBaseEnum +from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars +from icolos.utils.general.files_paths import attach_root_path +from icolos.core.composite_agents.workflow import WorkFlow +import shutil + +_SBE = StepBaseEnum + + +class Test_PMXabfe(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/pmx/abfe") + if os.path.isdir(cls._test_dir): + shutil.rmtree(cls._test_dir) + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + with open(PATHS_EXAMPLEDATA.PMX_ABFE_INPUT_COMPLEX, "r") as f: + data = f.read() + self.protein = GenericData(file_name="complex.pdb", file_data=data) + + def test_pmx_abfe(self): + step_conf = { + _SBE.STEPID: "01_PMX_ABFE", + _SBE.STEP_TYPE: _SBE.STEP_PMX_ABFE, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2", + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 8, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1, + }, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: {_SBE.SETTINGS_ARGUMENTS_FLAGS: ["--build"]}, + _SBE.SETTINGS_ADDITIONAL: { + # settings for protein parametrisation + "forcefield": "amber03", + "water": "tip3p", + }, + }, + } + + step_pmx_abfe = StepPMXabfe(**step_conf) + step_pmx_abfe.data.generic.add_file(self.protein) + + step_pmx_abfe.work_dir = self._test_dir + step_pmx_abfe._workflow_object = WorkFlow() + step_pmx_abfe.execute() + + self.assertEqual( + os.path.isfile(os.path.join(self._test_dir, "complex/genion.tpr")), True + ) + + stat_inf = os.stat(os.path.join(self._test_dir, "protein.gro")) + self.assertGreater(stat_inf.st_size, 70000) diff --git a/tests/pmx/test_analyse.py b/tests/pmx/test_analyse.py new file mode 100644 index 0000000..dba50ee --- /dev/null +++ b/tests/pmx/test_analyse.py @@ -0,0 +1,65 @@ +import unittest +import os +from icolos.core.workflow_steps.pmx.run_analysis import StepPMXRunAnalysis +from icolos.utils.enums.step_enums import StepBaseEnum +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + create_test_dir, + MAIN_CONFIG, + export_unit_test_env_vars, + get_ligands_as_compounds_with_conformers, +) +from icolos.utils.general.files_paths import attach_root_path +from icolos.core.composite_agents.workflow import WorkFlow +from icolos.core.containers.perturbation_map import PerturbationMap + +_SBE = StepBaseEnum + + +class Test_PMXanalyse(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/pmx/analyse") + # + create_test_dir(PATHS_EXAMPLEDATA.RUN_ANALYSIS_TEST_DIR, cls._test_dir) + + def setUp(self): + self.compounds = get_ligands_as_compounds_with_conformers( + PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS + ) + p_map = PerturbationMap(compounds=self.compounds) + p_map.parse_map_file(file_path=PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG_SINGLE_EDGE) + self.p_map = p_map + export_unit_test_env_vars() + + def test_pmx_analyse(self): + step_conf = { + _SBE.STEPID: "prepare_simulations", + _SBE.STEP_TYPE: "pmx_analyse", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a", + _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["PMX"]["CLI_ENTRYPOINT"], + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {}, + }, + } + step_pmx_analyse = StepPMXRunAnalysis(**step_conf) + step_pmx_analyse.work_dir = self._test_dir + step_pmx_analyse._workflow_object = WorkFlow() + step_pmx_analyse._workflow_object.workflow_data.perturbation_map = self.p_map + step_pmx_analyse.execute() + + stat_inf = os.stat( + os.path.join(self._test_dir, "0cd4b47_4f2ffa1/protein/analyse1/results.txt") + ) + + self.assertGreater(stat_inf.st_size, 19000) + + stat_inf = os.stat(os.path.join(self._test_dir, "resultsAll.csv")) + + self.assertGreater(stat_inf.st_size, 480) diff --git a/tests/pmx/test_assemble_systems.py b/tests/pmx/test_assemble_systems.py new file mode 100644 index 0000000..a424315 --- /dev/null +++ b/tests/pmx/test_assemble_systems.py @@ -0,0 +1,77 @@ +import unittest +import os +from icolos.core.workflow_steps.pmx.assemble_systems import StepPMXAssembleSystems +from icolos.core.containers.generic import GenericData +from icolos.core.composite_agents.workflow import WorkFlow +from icolos.utils.enums.step_enums import StepBaseEnum +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + MAIN_CONFIG, + create_test_dir, + export_unit_test_env_vars, + get_ligands_as_compounds_with_conformers, +) +from icolos.core.containers.perturbation_map import PerturbationMap +from icolos.utils.general.files_paths import attach_root_path +from icolos.utils.enums.program_parameters import PMXEnum, PMXAtomMappingEnum + +_SBE = StepBaseEnum +_PE = PMXEnum() +_PAE = PMXAtomMappingEnum() + + +class Test_PMXAssembleSystems(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/pmx/test_assemble_systems") + + create_test_dir(PATHS_EXAMPLEDATA.ASSEMBLE_SYSTEMS_TEST_DIR, cls._test_dir) + + def setUp(self): + self.compounds = get_ligands_as_compounds_with_conformers( + PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS + ) + p_map = PerturbationMap(compounds=self.compounds) + p_map.parse_map_file(file_path=PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG_SINGLE_EDGE) + self.p_map = p_map + with open(PATHS_EXAMPLEDATA.FEP_PLUS_OTHER_PROTEIN, "r") as f: + data = f.read() + self.protein = GenericData(file_name="protein.pdb", file_data=data) + + export_unit_test_env_vars() + + def test_assembleSystems(self): + + step_conf = { + _SBE.STEPID: "assemble_systems", + _SBE.STEP_TYPE: "pmx_assemble_systems", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2", + _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["PMX"]["CLI_ENTRYPOINT"], + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 8, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1, + }, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {}, + }, + } + + step_assembleSystems = StepPMXAssembleSystems(**step_conf) + step_assembleSystems.work_dir = self._test_dir + step_assembleSystems._workflow_object = WorkFlow() + step_assembleSystems._workflow_object.workflow_data.perturbation_map = ( + self.p_map + ) + step_assembleSystems.data.generic.add_file(self.protein) + step_assembleSystems.execute() + + stat_inf = os.stat( + os.path.join(self._test_dir, "0cd4b47_4f2ffa1/hybridStrTop/ffmerged.itp") + ) + self.assertEqual(stat_inf.st_size, 1695) diff --git a/tests/pmx/test_atomMapping.py b/tests/pmx/test_atomMapping.py new file mode 100644 index 0000000..2ea88ae --- /dev/null +++ b/tests/pmx/test_atomMapping.py @@ -0,0 +1,67 @@ +import unittest +import os +from icolos.core.workflow_steps.pmx.atomMapping import StepPMXatomMapping +from icolos.utils.enums.step_enums import StepBaseEnum +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + create_test_dir, + export_unit_test_env_vars, + get_ligands_as_compounds_with_conformers, +) +from icolos.core.containers.perturbation_map import PerturbationMap +from icolos.core.composite_agents.workflow import WorkFlow +from icolos.utils.general.files_paths import attach_root_path +from icolos.utils.enums.program_parameters import PMXEnum, PMXAtomMappingEnum + +_SBE = StepBaseEnum +_PE = PMXEnum() +_PAE = PMXAtomMappingEnum() + + +class Test_PMXatomMapping(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/pmx/test_atomMapping") + create_test_dir(PATHS_EXAMPLEDATA.ATOM_MAPPING_TEST_DIR, cls._test_dir) + + def setUp(self): + self.compounds = get_ligands_as_compounds_with_conformers( + PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS + ) + p_map = PerturbationMap(compounds=self.compounds) + p_map.parse_map_file(file_path=PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG_SINGLE_EDGE) + self.p_map = p_map + + export_unit_test_env_vars() + + def test_atomMapping(self): + + step_conf = { + _SBE.STEPID: "atommapping", + _SBE.STEP_TYPE: "pmx_atommapping", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2", + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 8, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1, + }, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {}, + }, + } + + step_atom_mapping = StepPMXatomMapping(**step_conf) + step_atom_mapping.work_dir = self._test_dir + step_atom_mapping._workflow_object = WorkFlow() + step_atom_mapping._workflow_object.workflow_data.perturbation_map = self.p_map + step_atom_mapping.execute() + + stat_inf = os.stat( + os.path.join(self._test_dir, "0cd4b47_4f2ffa1/hybridStrTop/out_pdb1.pdb") + ) + self.assertEqual(stat_inf.st_size, 4631) diff --git a/tests/pmx/test_box_water_ions.py b/tests/pmx/test_box_water_ions.py new file mode 100644 index 0000000..fa9199d --- /dev/null +++ b/tests/pmx/test_box_water_ions.py @@ -0,0 +1,70 @@ +import unittest +import os +from icolos.core.workflow_steps.pmx.box_water_ions import StepPMXBoxWaterIons +from icolos.core.composite_agents.workflow import WorkFlow +from icolos.utils.enums.step_enums import StepBaseEnum +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + create_test_dir, + export_unit_test_env_vars, + get_ligands_as_compounds_with_conformers, + MAIN_CONFIG, +) +from icolos.utils.general.files_paths import attach_root_path +from icolos.core.containers.perturbation_map import PerturbationMap + +_SBE = StepBaseEnum + + +class Test_PMXBoxWaterIons(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/pmx/test_box_water_ions") + + create_test_dir(PATHS_EXAMPLEDATA.BOX_WATER_IONS_TEST_DIR, cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + # initialise the map object for the two test ligands + self.compounds = get_ligands_as_compounds_with_conformers( + PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS + ) + p_map = PerturbationMap(compounds=self.compounds) + p_map.parse_map_file(file_path=PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG_SINGLE_EDGE) + self.p_map = p_map + + # def tearDown(self): + # shutil.rmtree(self._test_dir) + + def test_box_water_ions(self): + conf = { + _SBE.STEPID: "01_PMX_BOX_WATER_IONS", + _SBE.STEP_TYPE: _SBE.STEP_PMX_BOX_WATER_IONS, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2", + _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["PMX"]["CLI_ENTRYPOINT"], + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 8, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1, + }, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ADDITIONAL: {}, + }, + } + step = StepPMXBoxWaterIons(**conf) + step.data.compounds = self.compounds + step.work_dir = self._test_dir + step._workflow_object = WorkFlow() + step._workflow_object.workflow_data.perturbation_map = self.p_map + step.execute() + stat_inf = os.stat( + os.path.join(self._test_dir, "0cd4b47_4f2ffa1/water/tpr.tpr") + ) + self.assertGreater(stat_inf.st_size, 212100) + + stat_inf = os.stat( + os.path.join(self._test_dir, "0cd4b47_4f2ffa1/protein/tpr.tpr") + ) + self.assertGreater(stat_inf.st_size, 3505650) diff --git a/tests/pmx/test_doublebox.py b/tests/pmx/test_doublebox.py new file mode 100644 index 0000000..c969296 --- /dev/null +++ b/tests/pmx/test_doublebox.py @@ -0,0 +1,20 @@ +import unittest +import os +from tests.tests_paths import export_unit_test_env_vars +from icolos.utils.general.files_paths import attach_root_path + + +class Test_PMXdoublebox(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/pmx/doublebox") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + pass + + def test_XYZ(self): + pass diff --git a/tests/pmx/test_genlib.py b/tests/pmx/test_genlib.py new file mode 100644 index 0000000..a8185bb --- /dev/null +++ b/tests/pmx/test_genlib.py @@ -0,0 +1,20 @@ +import unittest +import os +from tests.tests_paths import export_unit_test_env_vars +from icolos.utils.general.files_paths import attach_root_path + + +class Test_PMXgenlib(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/pmx/genlib") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + pass + + def test_XYZ(self): + pass diff --git a/tests/pmx/test_gentop.py b/tests/pmx/test_gentop.py new file mode 100644 index 0000000..0384648 --- /dev/null +++ b/tests/pmx/test_gentop.py @@ -0,0 +1,20 @@ +import unittest +import os +from tests.tests_paths import export_unit_test_env_vars +from icolos.utils.general.files_paths import attach_root_path + + +class Test_PMXgentop(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/pmx/gentop") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + pass + + def test_XYZ(self): + pass diff --git a/tests/pmx/test_ligandHybrid.py b/tests/pmx/test_ligandHybrid.py new file mode 100644 index 0000000..815b20c --- /dev/null +++ b/tests/pmx/test_ligandHybrid.py @@ -0,0 +1,70 @@ +import unittest +import os +from icolos.core.workflow_steps.pmx.ligandHybrid import StepPMXligandHybrid +from icolos.core.containers.perturbation_map import PerturbationMap +from icolos.utils.enums.program_parameters import PMXEnum, PMXLigandHybridEnum +from icolos.core.composite_agents.workflow import WorkFlow +from icolos.utils.enums.step_enums import StepBaseEnum +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + create_test_dir, + get_ligands_as_compounds_with_conformers, + export_unit_test_env_vars, +) +from icolos.utils.general.files_paths import attach_root_path + + +_SBE = StepBaseEnum +_PE = PMXEnum() +_PHE = PMXLigandHybridEnum() + + +class Test_PMXligandHybrid(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/pmx/test_ligandHybrid") + # if not os.path.isdir(cls._test_dir): + # os.makedirs(cls._test_dir) + create_test_dir(PATHS_EXAMPLEDATA.LIGAND_HYBRID_TEST_DIR, cls._test_dir) + + def setUp(self): + self.compounds = get_ligands_as_compounds_with_conformers( + PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS + ) + p_map = PerturbationMap(compounds=self.compounds) + p_map.parse_map_file(file_path=PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG_SINGLE_EDGE) + self.p_map = p_map + + export_unit_test_env_vars() + + # def tearDown(self): + # shutil.rmtree(self._test_dir) + + def test_build_hybrid_topology_and_structure(self): + merged_itp_path = os.path.join( + self._test_dir, "0cd4b47_4f2ffa1/hybridStrTop/merged.itp" + ) + + step_conf = { + _SBE.STEPID: "ligand_hybrid", + _SBE.STEP_TYPE: "pmx_ligandHybrid", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {}, + }, + } + + step_ligand_hybrid = StepPMXligandHybrid(**step_conf) + step_ligand_hybrid.work_dir = self._test_dir + step_ligand_hybrid._workflow_object = WorkFlow() + step_ligand_hybrid._workflow_object.workflow_data.perturbation_map = self.p_map + step_ligand_hybrid.execute() + + stat_inf = os.stat(merged_itp_path) + self.assertEqual(stat_inf.st_size, 39468) diff --git a/tests/pmx/test_mutate.py b/tests/pmx/test_mutate.py new file mode 100644 index 0000000..3c77e55 --- /dev/null +++ b/tests/pmx/test_mutate.py @@ -0,0 +1,20 @@ +import unittest +import os +from tests.tests_paths import export_unit_test_env_vars +from icolos.utils.general.files_paths import attach_root_path + + +class Test_PMXmutate(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/pmx/mutate") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + pass + + def test_XYZ(self): + pass diff --git a/tests/pmx/test_prepare_simulations.py b/tests/pmx/test_prepare_simulations.py new file mode 100644 index 0000000..9ab1ef0 --- /dev/null +++ b/tests/pmx/test_prepare_simulations.py @@ -0,0 +1,73 @@ +import unittest +import os +from icolos.core.workflow_steps.pmx.prepare_simulations import StepPMXPrepareSimulations +from icolos.utils.enums.step_enums import StepBaseEnum +from icolos.core.composite_agents.workflow import WorkFlow +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + MAIN_CONFIG, + export_unit_test_env_vars, + create_test_dir, + get_ligands_as_compounds_with_conformers, +) +from icolos.utils.general.files_paths import attach_root_path +from icolos.core.containers.perturbation_map import PerturbationMap + +_SBE = StepBaseEnum + + +class Test_PMXPrepareSimulations(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/pmx/test_prepare_simulations") + + create_test_dir(PATHS_EXAMPLEDATA.PREPARE_SIMULATIONS_TEST_DIR, cls._test_dir) + + def setUp(self): + self.compounds = get_ligands_as_compounds_with_conformers( + PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS + ) + p_map = PerturbationMap(compounds=self.compounds) + p_map.parse_map_file(file_path=PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG_SINGLE_EDGE) + self.p_map = p_map + + export_unit_test_env_vars() + + def test_prepare_simulations(self): + + step_conf = { + _SBE.STEPID: "prepare_simulations", + _SBE.STEP_TYPE: "pmx_prepare_simulations", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2", + _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["PMX"]["CLI_ENTRYPOINT"], + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {"sim_type": "em"}, + }, + } + + step_prepare_simulations = StepPMXPrepareSimulations(**step_conf) + step_prepare_simulations.work_dir = self._test_dir + step_prepare_simulations._workflow_object = WorkFlow() + step_prepare_simulations._workflow_object.workflow_data.perturbation_map = ( + self.p_map + ) + step_prepare_simulations.execute() + + stat_inf = os.stat( + os.path.join(self._test_dir, "0cd4b47_4f2ffa1/water/stateA/run1/em/tpr.tpr") + ) + + self.assertGreater(stat_inf.st_size, 213300) + + stat_inf = os.stat( + os.path.join( + self._test_dir, "0cd4b47_4f2ffa1/protein/stateB/run3/em/tpr.tpr" + ) + ) + self.assertGreater(stat_inf.st_size, 3501000) diff --git a/tests/pmx/test_prepare_transitions.py b/tests/pmx/test_prepare_transitions.py new file mode 100644 index 0000000..9d4d13f --- /dev/null +++ b/tests/pmx/test_prepare_transitions.py @@ -0,0 +1,95 @@ +import unittest +import os +from icolos.core.workflow_steps.pmx.prepare_transitions import StepPMXPrepareTransitions +from icolos.core.composite_agents.workflow import WorkFlow +from icolos.utils.enums.step_enums import StepBaseEnum +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + create_test_dir, + MAIN_CONFIG, + export_unit_test_env_vars, + get_ligands_as_compounds_with_conformers, +) +from icolos.utils.general.files_paths import attach_root_path +from icolos.core.containers.perturbation_map import PerturbationMap + +_SBE = StepBaseEnum + + +class Test_PMXPrepareTransitions(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/pmx/prepare_transitions") + create_test_dir(PATHS_EXAMPLEDATA.PREPARE_TRANSITIONS_TEST_DIR, cls._test_dir) + export_unit_test_env_vars() + + def setUp(self): + self.compounds = get_ligands_as_compounds_with_conformers( + PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS + ) + p_map = PerturbationMap(compounds=self.compounds) + p_map.parse_map_file( + file_path=PATHS_EXAMPLEDATA.PMX_FEP_MAP_LOG_PREPARE_TRANSITIONS + ) + p_map.replicas = 1 + self.p_map = p_map + + def test_pmx_prepare_transitions(self): + + step_conf = { + _SBE.STEPID: "prepare_simulations", + _SBE.STEP_TYPE: "pmx_prepare_simulations", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2", + _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["PMX"]["CLI_ENTRYPOINT"], + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 8, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1, + }, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {"sim_type": "transitions"}, + }, + } + + step_prep_trans = StepPMXPrepareTransitions(**step_conf) + step_prep_trans.work_dir = self._test_dir + step_prep_trans._workflow_object = WorkFlow() + step_prep_trans._workflow_object.workflow_data.perturbation_map = self.p_map + step_prep_trans.execute() + + stat_inf = os.stat( + os.path.join( + self._test_dir, + "4f2ffa1_bd688d5/protein/stateA/run1/transitions/frame1.gro", + ) + ) + self.assertGreater(stat_inf.st_size, 6159200) + + stat_inf = os.stat( + os.path.join( + self._test_dir, + "4f2ffa1_bd688d5/protein/stateB/run1/transitions/frame1.gro", + ) + ) + self.assertGreater(stat_inf.st_size, 6159200) + + stat_inf = os.stat( + os.path.join( + self._test_dir, + "4f2ffa1_bd688d5/water/stateA/run1/transitions/frame1.gro", + ) + ) + self.assertGreater(stat_inf.st_size, 887000) + + stat_inf = os.stat( + os.path.join( + self._test_dir, + "4f2ffa1_bd688d5/water/stateB/run1/transitions/frame1.gro", + ) + ) + self.assertGreater(stat_inf.st_size, 887000) diff --git a/tests/pmx/test_run_simulations.py b/tests/pmx/test_run_simulations.py new file mode 100644 index 0000000..48d6bde --- /dev/null +++ b/tests/pmx/test_run_simulations.py @@ -0,0 +1,113 @@ +import unittest +import os +from icolos.core.containers.generic import GenericData +from icolos.core.workflow_steps.pmx.run_simulations import StepPMXRunSimulations +from icolos.utils.enums.step_enums import StepBaseEnum +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + create_test_dir, + MAIN_CONFIG, + export_unit_test_env_vars, + get_ligands_as_compounds_with_conformers, +) +from icolos.utils.general.files_paths import attach_root_path +from icolos.core.containers.perturbation_map import PerturbationMap +from icolos.core.composite_agents.workflow import WorkFlow + +_SBE = StepBaseEnum + + +class Test_PMXRunSimulations(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/pmx/run_simulations_test_dir") + create_test_dir(PATHS_EXAMPLEDATA.RUN_SIMULATIONS_TEST_DIR, cls._test_dir) + + def setUp(self): + self.compounds = get_ligands_as_compounds_with_conformers( + PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS + ) + with open(PATHS_EXAMPLEDATA.FEP_PLUS_OTHER_PROTEIN, "r") as f: + data = f.read() + self.protein = GenericData(file_name="protein.pdb", file_data=data) + p_map = PerturbationMap(compounds=self.compounds, protein=self.protein) + p_map.parse_map_file(file_path=PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG_MIN) + self.p_map = p_map + export_unit_test_env_vars() + + def test_run_simulations(self): + step_conf = { + _SBE.STEPID: "prepare_simulations", + _SBE.STEP_TYPE: "pmx_prepare_simulations", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2", + _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["PMX"]["CLI_ENTRYPOINT"], + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {"sim_type": "em"}, + }, + } + + step_run_simulations = StepPMXRunSimulations(**step_conf) + step_run_simulations.work_dir = self._test_dir + step_run_simulations._workflow_object = WorkFlow() + step_run_simulations.get_workflow_object().workflow_data.perturbation_map = ( + self.p_map + ) + step_run_simulations.execute() + + stat_inf = os.stat( + os.path.join(self._test_dir, "0cd4b47_4f2ffa1/water/stateB/run3/em/md.log") + ) + + self.assertEqual(stat_inf.st_size, 1347767) + + stat_inf = os.stat( + os.path.join( + self._test_dir, "0cd4b47_4f2ffa1/protein/stateB/run3/em/tpr.tpr" + ) + ) + self.assertEqual(stat_inf.st_size, 3501084) + + # def test_run_simulations_parallel(self): + # step_conf = { + # _SBE.STEPID: "prepare_simulations", + # _SBE.STEP_TYPE: "pmx_prepare_simulations", + # _SBE.EXEC: { + # _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a", + # _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["PMX"]["CLI_ENTRYPOINT"], + # _SBE.EXEC_PARALLELIZATION: { + # _SBE.EXEC_PARALLELIZATION_CORES: 2 + # } + # }, + # _SBE.SETTINGS: { + # _SBE.SETTINGS_ARGUMENTS: { + # _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + # _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + # }, + # _SBE.SETTINGS_ADDITIONAL: {"sim_type": "em"}, + # }, + # } + + # step_run_simulations = StepPMXRunSimulations(**step_conf) + # step_run_simulations.work_dir = self._test_dir + # step_run_simulations._workflow_object = WorkFlow() + # step_run_simulations.get_workflow_object().perturbation_map = self.p_map + # step_run_simulations.execute() + + # stat_inf = os.stat( + # os.path.join(self._test_dir, "0cd4b47_4f2ffa1/water/stateB/run3/em/md.log") + # ) + + # self.assertEqual(stat_inf.st_size, 1347767) + + # stat_inf = os.stat( + # os.path.join( + # self._test_dir, "0cd4b47_4f2ffa1/protein/stateB/run3/em/tpr.tpr" + # ) + # ) + # self.assertEqual(stat_inf.st_size, 3501084) diff --git a/tests/pmx/test_setup_workpath.py b/tests/pmx/test_setup_workpath.py new file mode 100644 index 0000000..2d9c96d --- /dev/null +++ b/tests/pmx/test_setup_workpath.py @@ -0,0 +1,80 @@ +import unittest +import os +from icolos.core.containers.generic import GenericData +from icolos.core.workflow_steps.pmx.setup_workpath import StepPMXSetup +from icolos.utils.enums.step_enums import StepBaseEnum +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + export_unit_test_env_vars, + get_ligands_as_compounds_with_conformers, +) +from icolos.utils.general.files_paths import attach_root_path +import shutil +from icolos.core.composite_agents.workflow import WorkFlow + +_SBE = StepBaseEnum + + +class Test_PMX_setup(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/pmx/test_setupWorkpath") + if os.path.exists(cls._test_dir): + shutil.rmtree(cls._test_dir) + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + export_unit_test_env_vars() + + def setUp(self): + self.compounds = get_ligands_as_compounds_with_conformers( + PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS + ) + with open(PATHS_EXAMPLEDATA.FEP_PLUS_OTHER_PROTEIN, "r") as f: + data = f.read() + self.protein = GenericData(file_name="protein.pdb", file_data=data) + with open(PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG, "r") as f: + data = f.read() + self.log_file = GenericData( + file_name="map.log", file_data=data, extension="log" + ) + + def test_setup_workpath(self): + step_conf = { + _SBE.STEPID: "01_PMX_SETUP", + _SBE.STEP_TYPE: _SBE.STEP_PMX_SETUP, + _SBE.EXEC: { + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 8, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1, + } + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ADDITIONAL: { + # settings for protein parametrisation + "forcefield": "amber03", + "water": "tip3p", + }, + }, + } + + step_setup = StepPMXSetup(**step_conf) + step_setup.data.compounds = self.compounds + step_setup.data.generic.add_file(self.protein) + step_setup.data.generic.add_file(self.log_file) + step_setup.data.generic.add_file( + GenericData( + file_name="mdp_files", + extension="mdp", + file_data=PATHS_EXAMPLEDATA.PMX_MDP_FILES, + ) + ) + step_setup.work_dir = self._test_dir + step_setup._workflow_object = WorkFlow() + step_setup.execute() + + assert os.path.isdir(os.path.join(self._test_dir, "input")) + assert os.path.isdir( + os.path.join(self._test_dir, "0cd4b47_4f2ffa1/water/stateA/run1/em") + ) + # stat some of the ligand files and check they've been deposited in the right directory diff --git a/tests/prediction/__init__.py b/tests/prediction/__init__.py new file mode 100644 index 0000000..84e4ecc --- /dev/null +++ b/tests/prediction/__init__.py @@ -0,0 +1,3 @@ +from tests.prediction.test_predictor import * +from tests.prediction.test_model_building import * +from tests.prediction.test_active_learning import * diff --git a/tests/prediction/test_active_learning.py b/tests/prediction/test_active_learning.py new file mode 100644 index 0000000..3ff5796 --- /dev/null +++ b/tests/prediction/test_active_learning.py @@ -0,0 +1,100 @@ +import unittest +from icolos.core.workflow_steps.prediction.active_learning import StepActiveLearning +from icolos.utils.enums.program_parameters import GlideEnum +import os +from icolos.utils.enums.step_enums import ( + StepActiveLearningEnum, + StepBaseEnum, + StepGlideEnum, +) +from icolos.utils.general.files_paths import attach_root_path +from tests.tests_paths import PATHS_1UYD, PATHS_EXAMPLEDATA + +_SBE = StepBaseEnum +_EE = GlideEnum() +_SGE = StepGlideEnum() +_SALE = StepActiveLearningEnum() + + +class TestActiveLearning(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/active_learning") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + self.ligands = PATHS_1UYD.LIGANDS + self.receptor_path = PATHS_1UYD.GRID_PATH + self.receptor_constraints_path = PATHS_1UYD.GRID_CONSTRAINTS_PATH + self.receptor_path_COX2 = PATHS_EXAMPLEDATA.PRIME_COX2_GRID + + @classmethod + def tearDownClass(cls): + pass + + def test_active_learning_docking(self): + step_conf = { + _SBE.STEPID: "01_active_learning", + _SBE.STEP_TYPE: _SBE.STEP_ACTIVE_LEARNING, + _SBE.EXEC: {}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ADDITIONAL: { + _SALE.VIRTUAL_LIB: self.ligands, + _SALE.N_ROUNDS: "2", + _SALE.INIT_SAMPLES: "2", + _SALE.BATCH_SIZE: "4", + _SALE.CRITERIA: _SGE.GLIDE_DOCKING_SCORE, + # config for embedding + docking + _SALE.ORACLE_CONFIG: [ + { + _SBE.STEPID: "01_glide", + _SBE.STEP_TYPE: _SBE.STEP_GLIDE, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws", + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 8, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1, + }, + _SBE.EXEC_FAILUREPOLICY: { + _SBE.EXEC_FAILUREPOLICY_NTRIES: 1 + }, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _EE.GLIDE_HOST: "cpu-only" + }, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SGE.CONFIGURATION: { + _EE.GLIDE_AMIDE_MODE: "trans", + _EE.GLIDE_EXPANDED_SAMPLING: "True", + _EE.GLIDE_GRIDFILE: [self.receptor_path], + _EE.GLIDE_NENHANCED_SAMPLING: "1", + _EE.GLIDE_POSE_OUTTYPE: _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB, + _EE.GLIDE_POSES_PER_LIG: "3", + _EE.GLIDE_POSTDOCK_NPOSE: "25", + _EE.GLIDE_POSTDOCKSTRAIN: "True", + _EE.GLIDE_PRECISION: "SP", + _EE.GLIDE_REWARD_INTRA_HBONDS: "True", + } + }, + }, + }, + ], + }, + }, + } + + step_active_learning = StepActiveLearning(**step_conf) + step_active_learning.execute() + out_path = os.path.join(self._test_dir, "production_model.pkl") + data = step_active_learning.data.generic.get_files_by_extension(ext="pkl")[ + 0 + ].get_data() + with open(out_path, "wb") as f: + f.write(data) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 348000) diff --git a/tests/prediction/test_model_building.py b/tests/prediction/test_model_building.py new file mode 100644 index 0000000..651583a --- /dev/null +++ b/tests/prediction/test_model_building.py @@ -0,0 +1,91 @@ +import json +import unittest +import os + +from icolos.core.workflow_steps.prediction.model_building import StepModelBuilder +from icolos.utils.enums.program_parameters import ModelBuilderEnum + +from icolos.utils.enums.step_enums import StepBaseEnum, StepModelBuilderEnum + +from tests.tests_paths import PATHS_EXAMPLEDATA, load_SDF_docked, MAIN_CONFIG +from icolos.utils.general.files_paths import attach_root_path + +_SBE = StepBaseEnum +_SME = ModelBuilderEnum() +_SMBE = StepModelBuilderEnum() + + +class Test_Model_Building(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/model_building") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + self._example_JSON = PATHS_EXAMPLEDATA.MODEL_BUILDER_EXAMPLE_JSON + self._compounds = load_SDF_docked( + PATHS_EXAMPLEDATA.MODEL_BUILDER_TEST_INPUT_SDF + ) + + @classmethod + def tearDownClass(cls): + pass + + def test_build_model(self): + step_conf = { + _SBE.STEPID: "01_model_building", + _SBE.STEP_TYPE: _SBE.STEP_PREDICTION, + _SBE.EXEC: { + _SBE.EXEC_BINARYLOCATION: " ".join( + [ + MAIN_CONFIG["OPTUNA_AZ"]["ENVIRONMENT_PYTHON"], + MAIN_CONFIG["OPTUNA_AZ"]["ENTRY_POINT_LOCATION"], + ] + ) + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _SME.CONFIG: self._example_JSON, + _SME.BEST_BUILDCONFIG_OUTPATH: os.path.join( + self._test_dir, "buildconfig.json" + ), + _SME.BEST_MODEL_OUTPATH: os.path.join( + self._test_dir, "best_model_trial.pkl" + ), + _SME.MERGED_MODEL_OUTPATH: os.path.join( + self._test_dir, "production_model.pkl" + ), + } + }, + _SBE.SETTINGS_ADDITIONAL: { + _SMBE.DATA: { + _SMBE.DATA_INPUT_COLUMN: "original_smiles", + _SMBE.DATA_RESPONSE_COLUMN: _SBE.ANNOTATION_TAG_DOCKING_SCORE, + } + }, + }, + } + model_step = StepModelBuilder(**step_conf) + model_step.data.compounds = self._compounds + + model_step.execute() + + # check, that the input data has been written as expected + out_path = os.path.join(self._test_dir, "best_param.json") + container = model_step.data.generic.get_files_by_extension(ext="json")[0] + with open(out_path, "w") as f: + json.dump(container.get_data(), f, indent=4) + stat_inf = os.stat(out_path) + self.assertEqual(_SMBE.TMP_OUTPUT_BEST_PARAMETERS, container.get_file_name()) + self.assertGreater(stat_inf.st_size, 800) + + # check, that a model has been produced + # note, that the model's size strongly depends on the underlying algorithm / hyper-parameters chosen + out_path = os.path.join(self._test_dir, "production_model.pkl") + data = model_step.data.generic.get_files_by_extension(ext="pkl")[0].get_data() + with open(out_path, "wb") as f: + f.write(data) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 5000) diff --git a/tests/prediction/test_predictor.py b/tests/prediction/test_predictor.py new file mode 100644 index 0000000..096fd97 --- /dev/null +++ b/tests/prediction/test_predictor.py @@ -0,0 +1,68 @@ +import unittest +import os + +from icolos.core.containers.compound import Compound, Enumeration +from icolos.core.workflow_steps.prediction.predictor import StepPredictor + +from icolos.utils.enums.step_enums import StepBaseEnum, StepPredictorEnum + +from tests.tests_paths import PATHS_EXAMPLEDATA, get_mol_as_Conformer +from icolos.utils.general.files_paths import attach_root_path + +_SBE = StepBaseEnum +_SPE = StepPredictorEnum() + + +class Test_Predictor(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/Prediction") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + self._example_model_path = attach_root_path(PATHS_EXAMPLEDATA.EPSA_MODEL_PATH) + self._example_mol_path = attach_root_path( + PATHS_EXAMPLEDATA.EPSA_EXAMPLE_MOLECULE + ) + + @classmethod + def tearDownClass(cls): + pass + + def test_predict_ePSA_with_descriptors(self): + step_conf = { + _SBE.STEPID: "01_predict_ePSA", + _SBE.STEP_TYPE: _SBE.STEP_PREDICTION, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: {_SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}}, + _SBE.SETTINGS_ADDITIONAL: { + _SPE.MODEL_PATH: self._example_model_path, + _SPE.FEATURES: [ + "bf_weighted_volume_boltzfactor_dmso", + "bf_weighted_area_boltzfactor_dmso", + "bf_weighted_HB_acc_boltzfactor_dmso", + "bf_weighted_HB_don_boltzfactor_dmso", + "bf_weighted_sigma2_boltzfactor_dmso", + "bf_weighted_Gsolv_meoh_boltzfactor_dmso", + ], + _SPE.NAME_PREDICTED: "pred_ePSA", + }, + }, + } + pred_step = StepPredictor(**step_conf) + pred_step.get_compounds().append(Compound()) + pred_step.get_compounds()[0].add_enumeration(Enumeration(), auto_update=True) + conformer = get_mol_as_Conformer(self._example_mol_path) + pred_step.data.compounds[0][0].add_conformers(conformer, auto_update=True) + pred_step.execute() + + self.assertEqual(len(pred_step.get_compounds()), 1) + self.assertEqual(len(pred_step.get_compounds()[0]), 1) + self.assertEqual(len(pred_step.get_compounds()[0][0]), 1) + + # check SDF write-out (including ePSA prediction as tag) + out_path = os.path.join(self._test_dir, "ePSA_predicted_annotated.sdf") + pred_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 4448) diff --git a/tests/rms_filter/__init__.py b/tests/rms_filter/__init__.py new file mode 100644 index 0000000..484bdf5 --- /dev/null +++ b/tests/rms_filter/__init__.py @@ -0,0 +1 @@ +from tests.rms_filter.test_rmsfilter import * diff --git a/tests/rms_filter/test_rmsfilter.py b/tests/rms_filter/test_rmsfilter.py new file mode 100644 index 0000000..7af7034 --- /dev/null +++ b/tests/rms_filter/test_rmsfilter.py @@ -0,0 +1,203 @@ +import unittest + +from icolos.core.containers.compound import Compound, Enumeration +from icolos.core.workflow_steps.calculation.rms_filter import StepRMSFilter +from icolos.utils.enums.step_enums import StepBaseEnum, StepRMSFilterEnum +from tests.tests_paths import PATHS_EXAMPLEDATA, get_mol_as_Conformer + +_SBE = StepBaseEnum +_SRF = StepRMSFilterEnum() + + +class Test_RMSfilter(unittest.TestCase): + @classmethod + def setUpClass(cls): + pass + + def setUp(self): + pass + + @classmethod + def tearDownClass(cls): + pass + + def test_RMSfiltering_alignmol_descending(self): + step_conf = { + _SBE.STEPID: "01_RMSfiltering", + _SBE.STEP_TYPE: _SBE.STEP_RMSFILTER, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SRF.METHOD: _SRF.METHOD_ALIGNMOL, + _SRF.THRESHOLD: 1, + _SRF.ORDER_BY: "E_cosmo", + _SRF.ORDER_ASCENDING: False, + }, + }, + } + + rf_step = StepRMSFilter(**step_conf) + rf_step.get_compounds().append(Compound(compound_number=1)) + rf_step.get_compounds()[0].add_enumeration(Enumeration(), auto_update=True) + conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS) + rf_step.data.compounds[0][0].add_conformers(conformers, auto_update=True) + + self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 11) + rf_step.execute() + self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 7) + + self.assertListEqual( + list( + float( + rf_step.get_compounds()[0][0] + .get_conformers()[i] + .get_molecule() + .GetProp("E_cosmo") + ) + for i in range(7) + ), + [ + -943306.7731, + -943304.5548, + -943301.0009, + -943300.9934, + -943303.7802, + -943304.0485, + -943304.0517, + ], + ) + + step_conf[_SBE.SETTINGS][_SBE.SETTINGS_ADDITIONAL][_SRF.THRESHOLD] = 1.5 + rf_step = StepRMSFilter(**step_conf) + rf_step.get_compounds().append(Compound(compound_number=1)) + rf_step.get_compounds()[0].add_enumeration(Enumeration(), auto_update=True) + conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS) + rf_step.data.compounds[0][0].add_conformers(conformers, auto_update=True) + + self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 11) + rf_step.execute() + self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 3) + + def test_RMSfiltering_alignmol_ascending(self): + step_conf = { + _SBE.STEPID: "01_RMSfiltering", + _SBE.STEP_TYPE: _SBE.STEP_RMSFILTER, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SRF.METHOD: _SRF.METHOD_ALIGNMOL, + _SRF.THRESHOLD: 1, + _SRF.ORDER_BY: "E_cosmo", + _SRF.ORDER_ASCENDING: True, + }, + }, + } + + rf_step = StepRMSFilter(**step_conf) + rf_step.get_compounds().append(Compound(compound_number=1)) + rf_step.get_compounds()[0].add_enumeration(Enumeration(), auto_update=True) + conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS) + rf_step.data.compounds[0][0].add_conformers(conformers, auto_update=True) + + self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 11) + rf_step.execute() + self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 5) + + self.assertListEqual( + list( + float( + rf_step.get_compounds()[0][0] + .get_conformers()[i] + .get_molecule() + .GetProp("E_cosmo") + ) + for i in range(5) + ), + [-943304.5487, -943300.2823, -943303.7733, -943304.0485, -943304.0517], + ) + + def test_RMSfiltering_best(self): + step_conf = { + _SBE.STEPID: "01_RMSfiltering", + _SBE.STEP_TYPE: _SBE.STEP_RMSFILTER, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SRF.METHOD: _SRF.METHOD_BEST, + _SRF.THRESHOLD: 1, + _SRF.ORDER_BY: "E_cosmo", + _SRF.ORDER_ASCENDING: False, + }, + }, + } + + rf_step = StepRMSFilter(**step_conf) + rf_step.get_compounds().append(Compound(compound_number=1)) + rf_step.get_compounds()[0].add_enumeration(Enumeration(), auto_update=True) + conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS) + rf_step.data.compounds[0][0].add_conformers(conformers, auto_update=True) + + self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 11) + rf_step.execute() + self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 4) + + self.assertListEqual( + list( + float( + rf_step.get_compounds()[0][0] + .get_conformers()[i] + .get_molecule() + .GetProp("E_cosmo") + ) + for i in range(4) + ), + [-943306.7731, -943304.5548, -943301.0009, -943304.0517], + ) + + def test_RMSfiltering_best_notordered(self): + step_conf = { + _SBE.STEPID: "01_RMSfiltering", + _SBE.STEP_TYPE: _SBE.STEP_RMSFILTER, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SRF.METHOD: _SRF.METHOD_BEST, + _SRF.THRESHOLD: 1, + }, + }, + } + + rf_step = StepRMSFilter(**step_conf) + rf_step.get_compounds().append(Compound(compound_number=1)) + rf_step.get_compounds()[0].add_enumeration(Enumeration(), auto_update=True) + conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS) + rf_step.data.compounds[0][0].add_conformers(conformers, auto_update=True) + + self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 11) + rf_step.execute() + self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 4) + + self.assertListEqual( + list( + float( + rf_step.get_compounds()[0][0] + .get_conformers()[i] + .get_molecule() + .GetProp("E_cosmo") + ) + for i in range(4) + ), + [-943306.7731, -943304.5487, -943301.0009, -943304.0485], + ) diff --git a/tests/rmsd/__init__.py b/tests/rmsd/__init__.py new file mode 100644 index 0000000..96049d9 --- /dev/null +++ b/tests/rmsd/__init__.py @@ -0,0 +1 @@ +from tests.rmsd.test_rmsd import Test_RMSD diff --git a/tests/rmsd/test_rmsd.py b/tests/rmsd/test_rmsd.py new file mode 100644 index 0000000..f11d2db --- /dev/null +++ b/tests/rmsd/test_rmsd.py @@ -0,0 +1,112 @@ +import unittest +from copy import deepcopy +from typing import List + +from rdkit.Geometry.rdGeometry import Point3D + +from icolos.core.containers.compound import Compound, Enumeration, unroll_conformers +from icolos.core.workflow_steps.calculation.rmsd import StepRMSD + +from icolos.utils.enums.step_enums import ( + StepBaseEnum, + StepRMSDEnum, + StepDataManipulationEnum, +) + +from tests.tests_paths import PATHS_EXAMPLEDATA, get_mol_as_Conformer + +_SBE = StepBaseEnum +_SR = StepRMSDEnum() +_SDM = StepDataManipulationEnum() + + +def _match_as_generic( + comp_list_1: List[Compound], comp_list_2: List[Compound] +) -> List[Compound]: + comp2_conf_unrolled = unroll_conformers(comp_list_2) + + # attach the second version of the conformers as generic field to the "real" input + # (as would be done by the data manipulator) + for comp in comp_list_1: + for enum in comp: + for conf in enum: + conf.add_extra_data( + key=_SDM.KEY_MATCHED, + data=[ + c + for c in comp2_conf_unrolled + if conf.get_index_string() == c.get_index_string() + ], + ) + return comp_list_1 + + +class Test_RMSD(unittest.TestCase): + @classmethod + def setUpClass(cls): + pass + + def setUp(self): + conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS) + + # Compound 1 with 1 enumeration and 11 conformers + self.comp1 = Compound(compound_number=1) + self.comp1.add_enumeration(Enumeration(), auto_update=True) + self.comp1[0].add_conformers(deepcopy(conformers), auto_update=True) + + # Compound 2 with 1 enumeration and 11 conformers, change of some coordinates + self.comp2 = Compound(compound_number=1) + self.comp2.add_enumeration(Enumeration(), auto_update=True) + self.comp2[0].add_conformers(deepcopy(conformers), auto_update=True) + self.comp2[0][1].get_molecule().GetConformer().SetAtomPosition( + 0, Point3D(-4.2239, -0.441, 0.2458) + ) + self.comp2[0][7].get_molecule().GetConformer().SetAtomPosition( + 0, Point3D(-1.5442, -0.7854, 0.5883) + ) + + @classmethod + def tearDownClass(cls): + pass + + def test_RMSD_conformers_matched(self): + step_conf = { + _SBE.STEPID: "01_RMSD", + _SBE.STEP_TYPE: _SBE.STEP_RMSD, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: {_SR.METHOD: _SR.METHOD_ALIGNMOL}, + }, + } + + rf_step = StepRMSD(**step_conf) + rf_step.get_compounds().append(_match_as_generic([self.comp1], [self.comp2])[0]) + self.assertEqual(len(rf_step.get_compounds()[0][0][0].get_extra_data()), 1) + + rf_step.execute() + + self.assertEqual( + rf_step.get_compounds()[0][0][1].get_molecule().GetProp(_SR.RMSD_TAG), + "0.002", + ) + self.assertEqual( + rf_step.get_compounds()[0][0][1] + .get_extra_data()[_SDM.KEY_MATCHED][0] + .get_molecule() + .GetProp(_SR.RMSD_TAG), + "0.002", + ) + self.assertEqual( + rf_step.get_compounds()[0][0][3].get_molecule().GetProp(_SR.RMSD_TAG), "0.0" + ) + self.assertEqual( + rf_step.get_compounds()[0][0][3] + .get_extra_data()[_SDM.KEY_MATCHED][0] + .get_molecule() + .GetProp(_SR.RMSD_TAG), + "0.0", + ) + self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 11) diff --git a/tests/schrodinger/__init__.py b/tests/schrodinger/__init__.py new file mode 100644 index 0000000..8ac707f --- /dev/null +++ b/tests/schrodinger/__init__.py @@ -0,0 +1,10 @@ +from tests.schrodinger.test_ligprep import * +from tests.schrodinger.test_glide import * +from tests.schrodinger.test_macromodel import * +from tests.schrodinger.test_fep_plus_setup import * +from tests.schrodinger.test_fep_plus_execution import * +from tests.schrodinger.test_fep_analysis import * +from tests.schrodinger.test_prepwizard import * +from tests.schrodinger.test_prime import * +from tests.schrodinger.test_desmond_production import * +from tests.schrodinger.test_desmond_setup import * diff --git a/tests/schrodinger/test_desmond_production.py b/tests/schrodinger/test_desmond_production.py new file mode 100644 index 0000000..445c1df --- /dev/null +++ b/tests/schrodinger/test_desmond_production.py @@ -0,0 +1,48 @@ +from icolos.core.containers.generic import GenericData +import unittest +from icolos.core.workflow_steps.schrodinger.desmond_exec import StepDesmondExec +from icolos.utils.general.files_paths import attach_root_path +import os +from tests.tests_paths import PATHS_EXAMPLEDATA + + +from icolos.utils.enums.step_enums import StepBaseEnum, StepDesmondEnum + +_SBE = StepBaseEnum +_SDE = StepDesmondEnum() + + +class Test_Desmond_Exec(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/schrodinger") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + with open(attach_root_path(PATHS_EXAMPLEDATA.DESMOND_SETUP_PDB), "rb") as f: + self.pdb = f.read() + + def test_desmond_production(self): + step_conf = { + _SBE.STEPID: "test_desmond_setup", + _SBE.STEP_TYPE: "desmond_preprocess", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws && $SCHRODINGER/jsc local-server-start" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: {}, + _SBE.SETTINGS_ADDITIONAL: {_SDE.CFG_FIELDS: {"time": "1"}}, + }, + } + + step_desmond_exec = StepDesmondExec(**step_conf) + step_desmond_exec.data.generic.add_file( + GenericData(file_name="structure.pdb", file_data=self.pdb, argument=True) + ) + step_desmond_exec.execute() + + out_path = os.path.join(self._test_dir, "out.cms") + step_desmond_exec.data.generic.write_out_all_files(self._test_dir) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 23587000) diff --git a/tests/schrodinger/test_desmond_setup.py b/tests/schrodinger/test_desmond_setup.py new file mode 100644 index 0000000..86d54a9 --- /dev/null +++ b/tests/schrodinger/test_desmond_setup.py @@ -0,0 +1,48 @@ +from icolos.core.containers.generic import GenericData +import unittest +from icolos.core.workflow_steps.schrodinger.desmond_preprocessor import StepDesmondSetup +from icolos.utils.general.files_paths import attach_root_path +import os +from tests.tests_paths import PATHS_EXAMPLEDATA + + +from icolos.utils.enums.step_enums import StepBaseEnum, StepDesmondEnum + +_SBE = StepBaseEnum +_SDE = StepDesmondEnum() + + +class Test_Desmond_Setup(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/schrodinger") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + with open(attach_root_path(PATHS_EXAMPLEDATA.DESMOND_SETUP_PDB), "r") as f: + self.pdb = f.read() + + def test_desmond_preprocess(self): + step_conf = { + _SBE.STEPID: "test_desmond_setup", + _SBE.STEP_TYPE: "desmond_preprocess", + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: {}, + _SBE.SETTINGS_ADDITIONAL: {_SDE.MSJ_FIELDS: {}}, + }, + } + + step_desmond_preprocess = StepDesmondSetup(**step_conf) + step_desmond_preprocess.data.generic.add_file( + GenericData(file_name="structure.pdb", file_data=self.pdb, argument=True) + ) + step_desmond_preprocess.execute() + + out_path = os.path.join(self._test_dir, "setup.cms") + step_desmond_preprocess.data.generic.write_out_all_files(self._test_dir) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 22560500) diff --git a/tests/schrodinger/test_fep_analysis.py b/tests/schrodinger/test_fep_analysis.py new file mode 100644 index 0000000..92ae420 --- /dev/null +++ b/tests/schrodinger/test_fep_analysis.py @@ -0,0 +1,105 @@ +from icolos.core.containers.generic import GenericData +import unittest +import os +from icolos.core.workflow_steps.schrodinger.fep_analysis import StepFepPlusAnalysis +from icolos.utils.enums.step_enums import StepBaseEnum, StepFepPlusEnum, StepGlideEnum +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + get_ligands_as_compounds_with_conformers, + PATHS_1UYD, +) +from icolos.utils.general.files_paths import attach_root_path + +_SBE = StepBaseEnum +_SFE = StepFepPlusEnum() +_SGE = StepGlideEnum() + + +class Test_FepPlusAnalysis(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/fep_plus") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + with open(attach_root_path(PATHS_EXAMPLEDATA.FEP_PLUS_MULTISIM_LONG), "r") as f: + self.log = f.read() + + self.mol = get_ligands_as_compounds_with_conformers( + attach_root_path(PATHS_1UYD.LIG_SDF) + ) + + def test_fep_analysis(self): + step_conf = { + _SBE.STEPID: "test_fep_analysis", + _SBE.STEP_TYPE: "fep_analysis", + _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2020-4"}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: {}, + _SBE.SETTINGS_ADDITIONAL: {_SFE.REFERENCE_DG: -10.76}, + }, + } + + step_fep_analysis = StepFepPlusAnalysis(**step_conf) + step_fep_analysis.data.compounds = self.mol + step_fep_analysis.data.generic.add_file( + GenericData( + file_name="test_multisim.log", file_data=self.log, argument=True + ) + ) + step_fep_analysis.execute() + # now confirm that the values have been parsed out of the log file properly + # true_conf_energies = ['2.67+-0.48', '0.00+-0.40', '2.86+-0.42', '8.88+-0.52', '3.09+-0.41'] + true_conf_energies = [ + -10.76, + -6.72, + -8.87, + -7.1, + -7.36, + -9.18, + -10.38, + -9.2, + -5.73, + -7.91, + -9.16, + -7.38, + -7.44, + -1.92, + -6.78, + -6.35, + -2.54, + -7.17, + -6.89, + -8.32, + -8.21, + -6.92, + -6.28, + -7.03, + -8.23, + -11.38, + -9.14, + -7.35, + -7.21, + -7.39, + -1.48, + -8.02, + -7.14, + -6.3, + -7.59, + -9.79, + -6.84, + -7.1, + ] + conformer_energies = [] + for compound in step_fep_analysis.data.compounds: + conformer_energies.append( + compound.get_enumerations()[0] + .get_conformers()[0] + .get_molecule() + .GetProp("map_dG") + ) + for idx, energy in enumerate(conformer_energies): + self.assertAlmostEqual( + float(energy.split("+-")[0]), true_conf_energies[idx], 2 + ) diff --git a/tests/schrodinger/test_fep_plus_execution.py b/tests/schrodinger/test_fep_plus_execution.py new file mode 100644 index 0000000..cd58eaa --- /dev/null +++ b/tests/schrodinger/test_fep_plus_execution.py @@ -0,0 +1,124 @@ +from icolos.core.containers.generic import GenericContainer, GenericData +import unittest +import os +from icolos.core.workflow_steps.schrodinger.fep_plus_execution import StepFepPlusExec +from icolos.utils.enums.step_enums import StepBaseEnum, StepFepPlusEnum, StepGlideEnum +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + get_ligands_as_compounds_with_conformers, + PATHS_1UYD, +) +from icolos.utils.general.files_paths import attach_root_path + +_SBE = StepBaseEnum +_SFE = StepFepPlusEnum() +_SGE = StepGlideEnum() + + +class Test_FepPlusExec(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/fep_plus") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + with open(attach_root_path(PATHS_EXAMPLEDATA.FEP_PLUS_EXAMPLE_FMP), "rb") as f: + self.fmp_in = f.read() + with open( + attach_root_path(PATHS_EXAMPLEDATA.FEP_PLUS_EXAMPLE_FMP_OUT), "rb" + ) as f: + self.fmp_out = f.read() + + with open(attach_root_path(PATHS_EXAMPLEDATA.FEP_PLUS_MULTISIM_LONG), "r") as f: + self.log = f.read() + with open(attach_root_path(PATHS_EXAMPLEDATA.FEP_PLUS_DOCKING_PV), "rb") as f: + self.poseviewer = f.read() + + # for this example we need five compounds, they can be empty containers + self.mol = get_ligands_as_compounds_with_conformers( + attach_root_path(PATHS_1UYD.LIG_SDF) + ) + + def test_fep_exec(self): + step_conf = { + _SBE.STEPID: "test_fep_setup", + _SBE.STEP_TYPE: "fep_setup", + _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2020-4"}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-WAIT", "-h"], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _SFE.HOST_FLAG: "localhost", + _SFE.JOBNAME_FLAG: "test", + }, + }, + _SBE.SETTINGS_ADDITIONAL: {_SFE.REFERENCE_DG: -10.76}, + }, + } + + step_fep_execution = StepFepPlusExec(**step_conf) + step_fep_execution.data.compounds = self.mol + step_fep_execution.data.generic.add_file( + GenericData(file_name="out.fmp", file_data=self.fmp_in, argument=True) + ) + step_fep_execution._unit_test_simulate_output(self.log, self.fmp_out) + out_path = os.path.join(self._test_dir, "test_out.fmp") + step_fep_execution.write_generic_by_extension(self._test_dir, "fmp") + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 316857) + + # now confirm that the values have been parsed out of the log file properly + # true_conf_energies = ['2.67+-0.48', '0.00+-0.40', '2.86+-0.42', '8.88+-0.52', '3.09+-0.41'] + true_conf_energies = [ + -10.76, + -6.72, + -8.87, + -7.1, + -7.36, + -9.18, + -10.38, + -9.2, + -5.73, + -7.91, + -9.16, + -7.38, + -7.44, + -1.92, + -6.78, + -6.35, + -2.54, + -7.17, + -6.89, + -8.32, + -8.21, + -6.92, + -6.28, + -7.03, + -8.23, + -11.38, + -9.14, + -7.35, + -7.21, + -7.39, + -1.48, + -8.02, + -7.14, + -6.3, + -7.59, + -9.79, + -6.84, + -7.1, + ] + conformer_energies = [] + for compound in step_fep_execution.data.compounds: + conformer_energies.append( + compound.get_enumerations()[0] + .get_conformers()[0] + .get_molecule() + .GetProp("map_dG") + ) + for idx, energy in enumerate(conformer_energies): + self.assertAlmostEqual( + float(energy.split("+-")[0]), true_conf_energies[idx], 2 + ) diff --git a/tests/schrodinger/test_fep_plus_setup.py b/tests/schrodinger/test_fep_plus_setup.py new file mode 100644 index 0000000..8cad54d --- /dev/null +++ b/tests/schrodinger/test_fep_plus_setup.py @@ -0,0 +1,92 @@ +import unittest +import os +from icolos.core.workflow_steps.schrodinger.fep_plus_setup import StepFepPlusSetup +from icolos.utils.enums.step_enums import StepBaseEnum, StepGlideEnum, StepFepPlusEnum +from tests.tests_paths import PATHS_1UYD +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + get_docked_ligands_as_conformers, + get_ligands_as_compounds_with_conformers, +) +from icolos.utils.general.files_paths import attach_root_path, empty_output_dir + +_SBE = StepBaseEnum +_SGE = StepGlideEnum() +_SFE = StepFepPlusEnum() + + +class Test_FepPlusSetup(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/fep_plus") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + with open(PATHS_EXAMPLEDATA.FEP_PLUS_DOCKING_PV, "rb") as f: + self.poseviewer = f.read() + self.mol1 = get_docked_ligands_as_conformers( + PATHS_1UYD.LIG4_POSES, poseviewer=self.poseviewer + ) + self.mol2 = get_ligands_as_compounds_with_conformers( + PATHS_1UYD.LIG_SDF, poseviewer=self.poseviewer + ) + empty_output_dir(self._test_dir) + + def test_fep_setup_with_xray(self): + step_conf = { + _SBE.STEPID: "test_fep_setup_with_xray", + _SBE.STEP_TYPE: _SBE.STEP_FEP_PLUS_SETUP, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SFE.XRAY_STRUCTURES: PATHS_1UYD.XRAY_STRUCTURES + }, + }, + } + step_fep_plus_setup = StepFepPlusSetup(**step_conf) + step_fep_plus_setup.data.compounds = self.mol2 + step_fep_plus_setup.execute() + + # now confirm that the map has been generated properly + out_path = os.path.join(self._test_dir, "xray_test_out.fmp") + step_fep_plus_setup.write_generic_by_extension( + path=os.path.join(self._test_dir, "xray_test_out.fmp"), + ext="fmp", + join=False, + ) + stat_inf = os.stat(out_path) + self.assertAlmostEqual(stat_inf.st_size, 821966, delta=500) + + def test_fep_setup(self): + step_conf = { + _SBE.STEPID: "test_fep_setup", + _SBE.STEP_TYPE: _SBE.STEP_FEP_PLUS_SETUP, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}, + } + }, + } + + step_fep_plus_setup = StepFepPlusSetup(**step_conf) + step_fep_plus_setup.data.compounds = self.mol1 + step_fep_plus_setup.execute() + + # now confirm that the map has been generated properly + out_path = os.path.join(self._test_dir, "test_out.fmp") + step_fep_plus_setup.write_generic_by_extension( + path=os.path.join(self._test_dir, "test_out.fmp"), ext="fmp", join=False + ) + stat_inf = os.stat(out_path) + self.assertAlmostEqual(stat_inf.st_size, 848697, delta=500) diff --git a/tests/schrodinger/test_glide.py b/tests/schrodinger/test_glide.py new file mode 100644 index 0000000..acf99b6 --- /dev/null +++ b/tests/schrodinger/test_glide.py @@ -0,0 +1,511 @@ +import os +import time +import unittest + +from icolos.core.workflow_steps.schrodinger.glide import StepGlide + +from icolos.utils.enums.step_enums import StepBaseEnum, TokenGuardEnum, StepGlideEnum +from icolos.utils.enums.program_parameters import GlideEnum + +from tests.tests_paths import ( + PATHS_1UYD, + PATHS_EXAMPLEDATA, + get_1UYD_ligands_as_Compounds, +) +from icolos.utils.general.files_paths import attach_root_path + +_SBE = StepBaseEnum +_SGE = StepGlideEnum() +_EE = GlideEnum() +_TE = TokenGuardEnum() + + +class Test_Glide(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/Glide") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + self._1UYD_compounds = get_1UYD_ligands_as_Compounds( + abs_path=PATHS_1UYD.LIGANDS + ) + self.receptor_path = PATHS_1UYD.GRID_PATH + self.receptor_constraints_path = PATHS_1UYD.GRID_CONSTRAINTS_PATH + self.receptor_path_COX2 = PATHS_EXAMPLEDATA.PRIME_COX2_GRID + + @classmethod + def tearDownClass(cls): + pass + + def test_Glide_run(self): + step_conf = { + _SBE.STEPID: "01_glide", + _SBE.STEP_TYPE: _SBE.STEP_GLIDE, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws", + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 4, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 2, + }, + _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {_EE.GLIDE_HOST: "cpu-only"}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SGE.CONFIGURATION: { + _EE.GLIDE_AMIDE_MODE: "trans", + _EE.GLIDE_EXPANDED_SAMPLING: "True", + _EE.GLIDE_GRIDFILE: [self.receptor_path], + _EE.GLIDE_NENHANCED_SAMPLING: "1", + _EE.GLIDE_POSE_OUTTYPE: _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB, + _EE.GLIDE_POSES_PER_LIG: "3", + _EE.GLIDE_POSTDOCK_NPOSE: "25", + _EE.GLIDE_POSTDOCKSTRAIN: "True", + _EE.GLIDE_PRECISION: "SP", + _EE.GLIDE_REWARD_INTRA_HBONDS: "True", + } + }, + }, + } + + glide_step = StepGlide(**step_conf) + glide_step.data.compounds = self._1UYD_compounds + + glide_step.execute() + self.assertEqual(len(glide_step.get_compounds()), 15) + self.assertEqual(len(glide_step.get_compounds()[0][0].get_conformers()), 3) + self.assertListEqual( + list( + glide_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-1.5198, 11.3439, 24.0245], + ) + + self.assertListEqual( + list( + glide_step.get_compounds()[14][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-2.1655, 12.4809, 24.137], + ) + self.assertEqual( + glide_step.get_compounds()[0][0][0] + .get_molecule() + .GetProp(_SBE.ANNOTATION_TAG_DOCKING_SCORE), + "-8.4349", + ) + self.assertEqual( + glide_step.get_compounds()[0][0][1] + .get_molecule() + .GetProp(_SBE.ANNOTATION_TAG_DOCKING_SCORE), + "-7.83118", + ) + self.assertEqual( + glide_step.get_compounds()[0][0][2] + .get_molecule() + .GetProp(_SBE.ANNOTATION_TAG_DOCKING_SCORE), + "-6.0089", + ) + + # check SDF write-out + out_path = os.path.join(self._test_dir, "glide_docked.sdf") + glide_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 209000) + + def test_Glide_run_parallelization_1core_singleton(self): + step_conf = { + _SBE.STEPID: "01_glide", + _SBE.STEP_TYPE: _SBE.STEP_GLIDE, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws", + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 1, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1, + }, + _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {_EE.GLIDE_HOST: "cpu-only"}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SGE.CONFIGURATION: { + _EE.GLIDE_AMIDE_MODE: "trans", + _EE.GLIDE_EXPANDED_SAMPLING: "True", + _EE.GLIDE_GRIDFILE: [self.receptor_path], + _EE.GLIDE_NENHANCED_SAMPLING: "1", + _EE.GLIDE_POSE_OUTTYPE: _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB, + _EE.GLIDE_POSES_PER_LIG: "3", + _EE.GLIDE_POSTDOCK_NPOSE: "25", + _EE.GLIDE_POSTDOCKSTRAIN: "True", + _EE.GLIDE_PRECISION: "SP", + _EE.GLIDE_REWARD_INTRA_HBONDS: "True", + } + }, + }, + } + + compounds = self._1UYD_compounds[:3] + + glide_step = StepGlide(**step_conf) + glide_step.data.compounds = compounds + + # execute on one core and put all in one list + time_difference = time.time() + glide_step.execute() + time_difference = time.time() - time_difference + self.assertGreater(time_difference, 100) + self.assertGreater(325, time_difference) + self.assertEqual(len(glide_step.get_compounds()), 3) + self.assertEqual(len(glide_step.get_compounds()[0][0].get_conformers()), 3) + self.assertListEqual( + list( + glide_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-1.5198, 11.3439, 24.0245], + ) + + # check SDF write-out + out_path = os.path.join( + self._test_dir, "glide_docked_single_core_singleton_list.sdf" + ) + glide_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 50500) + + def test_Glide_run_1_core(self): + step_conf = { + _SBE.STEPID: "01_glide", + _SBE.STEP_TYPE: _SBE.STEP_GLIDE, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws", + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 1}, + _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {_EE.GLIDE_HOST: "cpu-only"}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SGE.CONFIGURATION: { + _EE.GLIDE_AMIDE_MODE: "trans", + _EE.GLIDE_EXPANDED_SAMPLING: "True", + _EE.GLIDE_GRIDFILE: [self.receptor_path], + _EE.GLIDE_NENHANCED_SAMPLING: "1", + _EE.GLIDE_POSE_OUTTYPE: _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB, + _EE.GLIDE_POSES_PER_LIG: "3", + _EE.GLIDE_POSTDOCK_NPOSE: "25", + _EE.GLIDE_POSTDOCKSTRAIN: "True", + _EE.GLIDE_PRECISION: "SP", + _EE.GLIDE_REWARD_INTRA_HBONDS: "True", + } + }, + }, + } + + compounds = self._1UYD_compounds[:3] + + glide_step = StepGlide(**step_conf) + glide_step.data.compounds = compounds + + # execute on one core and put all in one list + time_difference = time.time() + glide_step.execute() + time_difference = time.time() - time_difference + self.assertGreater(325, time_difference) + self.assertEqual(len(glide_step.get_compounds()), 3) + self.assertEqual(len(glide_step.get_compounds()[0][0].get_conformers()), 3) + self.assertListEqual( + list( + glide_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-1.5198, 11.3439, 24.0245], + ) + + # check SDF write-out + out_path = os.path.join( + self._test_dir, "glide_docked_merged_list_3compounds.sdf" + ) + glide_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 50500) + + def test_Glide_run_parallelization_4cores(self): + step_conf = { + _SBE.STEPID: "01_glide", + _SBE.STEP_TYPE: _SBE.STEP_GLIDE, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws", + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 4}, + _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {_EE.GLIDE_HOST: "cpu-only"}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SGE.CONFIGURATION: { + _EE.GLIDE_AMIDE_MODE: "trans", + _EE.GLIDE_EXPANDED_SAMPLING: "True", + _EE.GLIDE_GRIDFILE: [self.receptor_path], + _EE.GLIDE_NENHANCED_SAMPLING: "1", + _EE.GLIDE_POSE_OUTTYPE: _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB, + _EE.GLIDE_POSES_PER_LIG: "3", + _EE.GLIDE_POSTDOCK_NPOSE: "25", + _EE.GLIDE_POSTDOCKSTRAIN: "True", + _EE.GLIDE_PRECISION: "SP", + _EE.GLIDE_REWARD_INTRA_HBONDS: "True", + } + }, + }, + } + + compounds = self._1UYD_compounds[:3] + + glide_step = StepGlide(**step_conf) + glide_step.data.compounds = compounds + + # execute and put all in one list + time_difference = time.time() + glide_step.execute() + time_difference = time.time() - time_difference + self.assertGreater(150, time_difference) + self.assertEqual(len(glide_step.get_compounds()), 3) + self.assertEqual(len(glide_step.get_compounds()[0][0].get_conformers()), 3) + self.assertListEqual( + list( + glide_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-1.5198, 11.3439, 24.0245], + ) + + # check SDF write-out + out_path = os.path.join( + self._test_dir, "glide_docked_parallelized_3compounds.sdf" + ) + glide_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 50500) + + def test_Glide_run_parallelization_4cores_in_file_usage(self): + step_conf = { + _SBE.STEPID: "01_glide", + _SBE.STEP_TYPE: _SBE.STEP_GLIDE, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws", + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 4}, + _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {_EE.GLIDE_HOST: "cpu-only"}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SGE.CONFIGURATION: { + _EE.GLIDE_AMIDE_MODE: "trans", + _EE.GLIDE_EXPANDED_SAMPLING: "True", + _EE.GLIDE_GRIDFILE: [self.receptor_constraints_path], + _EE.GLIDE_NENHANCED_SAMPLING: "1", + _EE.GLIDE_POSE_OUTTYPE: _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB, + _EE.GLIDE_POSES_PER_LIG: "3", + _EE.GLIDE_POSTDOCK_NPOSE: "25", + _EE.GLIDE_POSTDOCKSTRAIN: "True", + _EE.GLIDE_PRECISION: "SP", + _EE.GLIDE_REWARD_INTRA_HBONDS: "True", + }, + _SGE.MAESTRO_IN_FILE: { + _SGE.MAESTRO_IN_FILE_PATH: PATHS_EXAMPLEDATA.GLIDE_EXAMPLE_IN + }, + }, + }, + } + + compounds = self._1UYD_compounds[:3] + + glide_step = StepGlide(**step_conf) + glide_step.data.compounds = compounds + glide_step.execute() + + # execute on one core and put all in one list + self.assertEqual(len(glide_step.get_compounds()), 3) + self.assertEqual(len(glide_step.get_compounds()[0][0].get_conformers()), 3) + + # would be [-2.5618, 10.8202, 25.2644] without constraints + self.assertListEqual( + list( + glide_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [3.1229, 4.5141, 24.8603], + ) + + # check SDF write-out + out_path = os.path.join( + self._test_dir, "glide_docked_parallelized_3compounds.sdf" + ) + glide_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 50500) + + def test_Glide_run_parallelization_4cores_ensemble_docking(self): + step_conf = { + _SBE.STEPID: "01_glide", + _SBE.STEP_TYPE: _SBE.STEP_GLIDE, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws", + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 4}, + _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {_EE.GLIDE_HOST: "cpu-only"}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SGE.CONFIGURATION: { + _EE.GLIDE_AMIDE_MODE: "trans", + _EE.GLIDE_EXPANDED_SAMPLING: "True", + _EE.GLIDE_GRIDFILE: [ + self.receptor_path_COX2, + self.receptor_path, + ], + _EE.GLIDE_NENHANCED_SAMPLING: "1", + _EE.GLIDE_POSE_OUTTYPE: _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB, + _EE.GLIDE_POSES_PER_LIG: "3", + _EE.GLIDE_POSTDOCK_NPOSE: "25", + _EE.GLIDE_POSTDOCKSTRAIN: "True", + _EE.GLIDE_PRECISION: "SP", + _EE.GLIDE_REWARD_INTRA_HBONDS: "True", + }, + _SBE.GRID_IDS: ["mygrid1", "mygrid2"], + }, + }, + } + + compounds = self._1UYD_compounds[:3] + + glide_step = StepGlide(**step_conf) + glide_step.data.compounds = compounds + + # execute on one core and put all in one list + glide_step.execute() + self.assertEqual(len(glide_step.get_compounds()), 3) + self.assertEqual(len(glide_step.get_compounds()[0][0].get_conformers()), 6) + self.assertListEqual( + list( + glide_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-1.5198, 11.3439, 24.0245], + ) + self.assertListEqual( + list( + glide_step.get_compounds()[0][0][5] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [7.3776, 55.7005, 70.3807], + ) + self.assertListEqual( + ["mygrid2", "mygrid2", "mygrid1", "mygrid2", "mygrid1", "mygrid1"], + [ + comp.get_molecule().GetProp(_SBE.ANNOTATION_GRID_ID) + for comp in list(glide_step.get_compounds()[0][0]) + ], + ) + + # check SDF write-out + out_path = os.path.join( + self._test_dir, "glide_docked_parallelized_ensemble_docking.sdf" + ) + glide_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 80000) + + def test_Glide_run_parallelization_poseviewer(self): + step_conf = { + _SBE.STEPID: "01_glide", + _SBE.STEP_TYPE: _SBE.STEP_GLIDE, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws", + _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 4}, + _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {_EE.GLIDE_HOST: "cpu-only"}, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SGE.CONFIGURATION: { + _EE.GLIDE_AMIDE_MODE: "trans", + _EE.GLIDE_EXPANDED_SAMPLING: "True", + _EE.GLIDE_GRIDFILE: [self.receptor_path], + _EE.GLIDE_NENHANCED_SAMPLING: "1", + _EE.GLIDE_POSE_OUTTYPE: _EE.GLIDE_POSE_OUTTYPE_POSEVIEWER, + _EE.GLIDE_POSES_PER_LIG: "3", + _EE.GLIDE_POSTDOCK_NPOSE: "25", + _EE.GLIDE_POSTDOCKSTRAIN: "True", + _EE.GLIDE_PRECISION: "SP", + _EE.GLIDE_REWARD_INTRA_HBONDS: "True", + } + }, + }, + } + + compounds = self._1UYD_compounds[:3] + + glide_step = StepGlide(**step_conf) + glide_step.data.compounds = compounds + + # execute on one core and put all in one list + glide_step.execute() + + self.assertEqual(len(glide_step.get_compounds()), 3) + self.assertEqual(len(glide_step.get_compounds()[0][0].get_conformers()), 3) + self.assertListEqual( + list( + glide_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-1.5198, 11.3439, 24.0245], + ) + + # check SDF write-out + out_path = os.path.join( + self._test_dir, "glide_docked_parallelized_3compounds_pv.sdf" + ) + glide_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 50500) diff --git a/tests/schrodinger/test_ligprep.py b/tests/schrodinger/test_ligprep.py new file mode 100644 index 0000000..c8c7ef3 --- /dev/null +++ b/tests/schrodinger/test_ligprep.py @@ -0,0 +1,220 @@ +import unittest + +from icolos.core.workflow_steps.schrodinger.ligprep import StepLigprep +from icolos.utils.enums.step_enums import StepBaseEnum, TokenGuardEnum, StepLigprepEnum +from icolos.utils.enums.program_parameters import LigprepEnum +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + get_mol_as_Compound, + get_test_Compounds_without_molecules, +) +from icolos.utils.general.files_paths import attach_root_path + +_SBE = StepBaseEnum +_LSBE = StepLigprepEnum() +_LIE = LigprepEnum() +_TE = TokenGuardEnum() + + +class Test_Ligprep(unittest.TestCase): + @classmethod + def setUpClass(cls): + pass + + def setUp(self): + self._paracetamol_molecule = get_mol_as_Compound( + attach_root_path(PATHS_EXAMPLEDATA.PARACETAMOL_PATH), compound_number=0 + ) + self._aspirin_molecule = get_mol_as_Compound( + attach_root_path(PATHS_EXAMPLEDATA.ASPIRIN_PATH), compound_number=1 + ) + self._Aspirin = get_test_Compounds_without_molecules(compound_numbers=[2])[ + "Aspirin" + ] + + @classmethod + def tearDownClass(cls): + pass + + def test_LigPrep_run(self): + step_conf = { + _SBE.STEPID: "01_ligprep", + _SBE.STEP_TYPE: _SBE.STEP_LIGPREP, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2020-4", + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 2, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 2, + }, + _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 2}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _LIE.LIGPREP_F: "/a/path/to/be/ignored/filter.txt" + } + } + }, + } + + ligprep_step = StepLigprep(**step_conf) + ligprep_step.data.compounds = [ + self._paracetamol_molecule, + self._aspirin_molecule, + self._Aspirin, + ] + + ligprep_step.execute() + self.assertEqual( + ["0:0", "1:0", "2:0"], + [ + enum.get_index_string() + for comp in ligprep_step.get_compounds() + for enum in comp + ], + ) + self.assertEqual( + [ + "[H]c1c([H])c(Cl)c2c(=O)nc(N([H])C(=O)C([H])([H])[H])sc2c1[H]", + "[H]c1c([H])c(Cl)c2c(=O)nc(C(=O)[O-])sc2c1[H]", + "O=C(C)Oc1ccccc1C(=O)O", + ], + [ + enum.get_original_smile() + for comp in ligprep_step.get_compounds() + for enum in comp + ], + ) + self.assertEqual( + [ + "[H]c1c([H])c(Cl)c2c(=O)nc(N([H])C(=O)C([H])([H])[H])sc2c1[H]", + "[H]OC(=O)c1nc(=O)c2c(Cl)c([H])c([H])c([H])c2s1", + "[H]OC(=O)c1c([H])c([H])c([H])c([H])c1OC(=O)C([H])([H])[H]", + ], + [ + enum.get_smile() + for comp in ligprep_step.get_compounds() + for enum in comp + ], + ) + self.assertListEqual( + list( + ligprep_step.get_compounds()[0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-4.9037, 3.0725, 2.0034], + ) + self.assertListEqual( + list( + ligprep_step.get_compounds()[1][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-4.8794, 3.0688, -2.0104], + ) + self.assertListEqual( + list( + ligprep_step.get_compounds()[2][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [0.0243, 2.4719, -0.3164], + ) + + def test_LigPrep_run_EPIK_stereo_filtering(self): + step_conf = { + _SBE.STEPID: "01_ligprep", + _SBE.STEP_TYPE: _SBE.STEP_LIGPREP, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2020-4", + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 2, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 2, + }, + _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 2}, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [_LIE.LIGPREP_EPIK], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + _LIE.LIGPREP_PH: 7.0, + _LIE.LIGPREP_PHT: 2.0, + _LIE.LIGPREP_S: 10, + _LIE.LIGPREP_BFF: 14, + }, + }, + _SBE.SETTINGS_ADDITIONAL: {_LSBE.FILTER_FILE: {"Total_charge": "!= 0"}}, + }, + } + + ligprep_step = StepLigprep(**step_conf) + ligprep_step.data.compounds = [ + self._paracetamol_molecule, + self._aspirin_molecule, + self._Aspirin, + ] + + ligprep_step.execute() + self.assertEqual( + ["0:0", "0:1", "1:0"], + [ + enum.get_index_string() + for comp in ligprep_step.get_compounds() + for enum in comp + ], + ) + self.assertEqual( + [ + "[H]c1c([H])c(Cl)c2c(=O)nc(N([H])C(=O)C([H])([H])[H])sc2c1[H]", + "[H]c1c([H])c(Cl)c2c(=O)nc(N([H])C(=O)C([H])([H])[H])sc2c1[H]", + "[H]c1c([H])c(Cl)c2c(=O)nc(C(=O)[O-])sc2c1[H]", + ], + [ + enum.get_original_smile() + for comp in ligprep_step.get_compounds() + for enum in comp + ], + ) + self.assertEqual( + [ + "[H]c1c([H])c(Cl)c2c(=O)n([H])/c(=N\\C(=O)C([H])([H])[H])sc2c1[H]", + "[H]c1c([H])c(Cl)c2c(=O)nc(N([H])C(=O)C([H])([H])[H])sc2c1[H]", + "[H]c1c([H])c(Cl)c2c(=O)[n+]([H])c(C(=O)[O-])sc2c1[H]", + ], + [ + enum.get_smile() + for comp in ligprep_step.get_compounds() + for enum in comp + ], + ) + self.assertListEqual( + list( + ligprep_step.get_compounds()[0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-4.7828, 5.0389, -2.1622], + ) + self.assertListEqual( + list( + ligprep_step.get_compounds()[0][1] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-4.9037, 3.0725, 2.0034], + ) + self.assertListEqual( + list( + ligprep_step.get_compounds()[1][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-5.2155, 3.215, -1.1152], + ) diff --git a/tests/schrodinger/test_macromodel.py b/tests/schrodinger/test_macromodel.py new file mode 100644 index 0000000..d9f358b --- /dev/null +++ b/tests/schrodinger/test_macromodel.py @@ -0,0 +1,93 @@ +import unittest +import os + +from icolos.core.workflow_steps.schrodinger.macromodel import StepMacromodel + +from icolos.utils.enums.step_enums import StepBaseEnum, TokenGuardEnum +from icolos.utils.enums.program_parameters import MacromodelEnum + +from tests.tests_paths import PATHS_EXAMPLEDATA, get_mol_as_Compound +from icolos.utils.general.files_paths import attach_root_path + +_SBE = StepBaseEnum +_CE = MacromodelEnum() +_TE = TokenGuardEnum() + + +class Test_Macromodel_confgen(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/MacroModel") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + self._paracetamol_molecule = get_mol_as_Compound( + attach_root_path(PATHS_EXAMPLEDATA.PARACETAMOL_PATH) + ) + self._aspirin_molecule = get_mol_as_Compound( + attach_root_path(PATHS_EXAMPLEDATA.ASPIRIN_PATH) + ) + + @classmethod + def tearDownClass(cls): + pass + + def test_MacroModel_run(self): + step_conf = { + _SBE.STEPID: "01_macromodel", + _SBE.STEP_TYPE: _SBE.STEP_MACROMODEL, + _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2020-4"}, + _TE.TG: { + _TE.TG_PREFIX_EXECUTION: "module load schrodinger/2020-4", + _TE.TG_TOKEN_POOLS: {"MMOD_MACROMODEL": 2}, + _TE.TG_WAIT_INTERVAL_SECONDS: 30, + _TE.TG_WAIT_LIMIT_SECONDS: 900, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [_CE.MACROMODEL_WAIT], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {_CE.MACROMODEL_NJOBS: 2}, + } + }, + } + + mm_step = StepMacromodel(**step_conf) + mm_step.data.compounds = [self._paracetamol_molecule] + + # conformer coordinates should not be touched by the execution + self.assertListEqual( + list( + mm_step.get_compounds()[0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-3.8276, -1.0625, 0.3279], + ) + mm_step.execute() + self.assertListEqual( + list( + mm_step.get_compounds()[0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-3.8276, -1.0625, 0.3279], + ) + self.assertEqual(len(mm_step.get_compounds()[0][0].get_conformers()), 10) + self.assertEqual( + list( + mm_step.get_compounds()[0][0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [-4.2269, -0.441, 0.2359], + ) + + # check write-out + out_path = os.path.join(self._test_dir, "macromodel_output_file.sdf") + mm_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 25637) diff --git a/tests/schrodinger/test_prepwizard.py b/tests/schrodinger/test_prepwizard.py new file mode 100644 index 0000000..95511da --- /dev/null +++ b/tests/schrodinger/test_prepwizard.py @@ -0,0 +1,118 @@ +import unittest + +from icolos.utils.enums.step_enums import StepBaseEnum +from icolos.core.workflow_steps.schrodinger.prepwizard import StepPrepwizard +from icolos.core.containers.generic import GenericData +from tests.tests_paths import ( + PATHS_1UYD, + PATHS_EXAMPLEDATA, +) +import os +from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum, StepPrepwizEnum +from tests.tests_paths import PATHS_EXAMPLEDATA +from icolos.utils.general.files_paths import attach_root_path +from icolos.core.workflow_steps.schrodinger.prepwizard import StepPrepwizard + +_SGE = StepGromacsEnum() +_SBE = StepBaseEnum +_SPE = StepPrepwizEnum() + + +class Test_Prepwizard(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/prepwizard") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + with open(PATHS_1UYD.PDB_PATH, "r") as f: + data = f.read() + self.GenericData = GenericData(file_name="test_structure.pdb", file_data=data) + with open(PATHS_EXAMPLEDATA.DESMOND_SETUP_PDB, "r") as f: + self.cox = f.read() + + def test_prepwizard(self): + step_conf = { + _SBE.STEPID: "01_ligprep", + _SBE.STEP_TYPE: _SBE.STEP_PREPWIZARD, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2020-4", + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: {_SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}} + }, + } + + prepwiz_step = StepPrepwizard(**step_conf) + prepwiz_step.data.generic.add_file(self.GenericData) + prepwiz_step.execute() + + out_file = prepwiz_step.data.generic.get_files_by_extension("pdb")[0].get_data() + out_path = os.path.join(self._test_dir, "test_out.pdb") + with open(out_path, "w") as f: + f.write(out_file) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 53635) + + def test_remove_ligand(self): + step_conf = { + _SBE.STEPID: "test_rem", + _SBE.STEP_TYPE: _SBE.STEP_PREPWIZARD, + _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "ml schrodinger"}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ADDITIONAL: {_SPE.REMOVE_RES: ["S58"]}, + }, + } + + step_removelig = StepPrepwizard(**step_conf) + step_removelig.data.generic.add_file( + GenericData(file_name="cox.pdb", file_data=self.cox, argument=True) + ) + + step_removelig.execute() + out_path = os.path.join(self._test_dir, "cox.pdb") + step_removelig.write_generic_by_extension( + self._test_dir, + _SGE.PROTEIN_PDB, + ) + + out_file = step_removelig.data.generic.get_files_by_extension("pdb")[ + 0 + ].get_data() + with open(out_path, "w") as f: + f.write(out_file) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 738100) + + def test_auto_remove_ligand(self): + step_conf = { + _SBE.STEPID: "test_rem", + _SBE.STEP_TYPE: _SBE.STEP_PREPWIZARD, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws" + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ADDITIONAL: {_SPE.REMOVE_RES: "ligands"}, + }, + } + + step_removelig = StepPrepwizard(**step_conf) + step_removelig.data.generic.add_file( + GenericData(file_name="cox.pdb", file_data=self.cox, argument=True) + ) + + step_removelig.execute() + out_path = os.path.join(self._test_dir, "cox_auto.pdb") + step_removelig.write_generic_by_extension( + self._test_dir, + _SGE.PROTEIN_PDB, + ) + + out_file = step_removelig.data.generic.get_files_by_extension("pdb")[ + 0 + ].get_data() + with open(out_path, "w") as f: + f.write(out_file) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 724500) diff --git a/tests/schrodinger/test_prime.py b/tests/schrodinger/test_prime.py new file mode 100644 index 0000000..ef3a631 --- /dev/null +++ b/tests/schrodinger/test_prime.py @@ -0,0 +1,195 @@ +import unittest +import os + +from icolos.core.workflow_steps.schrodinger.prime import StepPrime + +from icolos.utils.enums.step_enums import StepBaseEnum, StepPrimeEnum, TokenGuardEnum +from icolos.utils.enums.program_parameters import PrimeEnum + +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + get_mol_as_Compound, + get_ligands_as_compounds_with_conformers, +) +from icolos.utils.general.files_paths import attach_root_path + +_SBE = StepBaseEnum +_SPE = StepPrimeEnum() +_CE = PrimeEnum() +_TE = TokenGuardEnum() + + +class Test_Prime(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/prime_test") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + with open(attach_root_path(PATHS_EXAMPLEDATA.PRIME_POSEVIEWER), "rb") as f: + self._poseviewer = f.read() + self._molecule = get_mol_as_Compound( + attach_root_path(PATHS_EXAMPLEDATA.PRIME_DOCKED_LIGAND_SDF) + ) + self._conformers = get_ligands_as_compounds_with_conformers( + attach_root_path(PATHS_EXAMPLEDATA.LIGANDS_1UYD) + ) + + @classmethod + def tearDownClass(cls): + pass + + def test_Prime_run(self): + # TODO: make sure the original execution mode (on enumerations) works ok + # * Pull the molecule from the enumeration if no conformers attached + # * add conformer to the enum at the end + step_conf = { + _SBE.STEPID: "01_prime", + _SBE.STEP_TYPE: _SBE.STEP_PRIME, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws", + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 4, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 2, + }, + _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1}, + }, + _TE.TG: { + _TE.TG_PREFIX_EXECUTION: "module load schrodinger/2021-2-js-aws", + _TE.TG_TOKEN_POOLS: {"PRIMEX_MAIN": 8}, + _TE.TG_WAIT_INTERVAL_SECONDS: 30, + _TE.TG_WAIT_LIMIT_SECONDS: 900, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-prime_opt": "OPLS_VERSION=OPLS3e", + "-HOST": "cpu-only", + }, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SPE.RECEPTOR: attach_root_path(PATHS_EXAMPLEDATA.RECEPTOR_1UYD) + }, + }, + } + + prime_step = StepPrime(**step_conf) + prime_step.data.compounds = [self._molecule] + prime_step.execute() + + self.assertEqual(len(prime_step.get_compounds()[0][0].get_conformers()), 1) + # molecule coordinates should not be touched by the execution (conformer is optimized though) + self.assertListEqual( + list( + prime_step.get_compounds()[0][0] + .get_molecule() + .GetConformer(0) + .GetPositions()[0] + ), + [15.2886, 52.7, 69.7128], + ) + + # check write-out + out_path = os.path.join(self._test_dir, "prime_output_file.sdf") + prime_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 10000) + self.assertGreater(13500, stat_inf.st_size) + + def test_prime_run_conformers(self): + step_conf = { + _SBE.STEPID: "01_prime", + _SBE.STEP_TYPE: _SBE.STEP_PRIME, + _SBE.EXEC: { + _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws", + _SBE.EXEC_PARALLELIZATION: { + _SBE.EXEC_PARALLELIZATION_CORES: 32, + _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1, + }, + _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1}, + }, + _TE.TG: { + _TE.TG_PREFIX_EXECUTION: "module load schrodinger/2021-2-js-aws", + _TE.TG_TOKEN_POOLS: {"PRIMEX_MAIN": 8}, + _TE.TG_WAIT_INTERVAL_SECONDS: 30, + _TE.TG_WAIT_LIMIT_SECONDS: 900, + }, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-prime_opt": "OPLS_VERSION=OPLS3e", + "-HOST": "cpu-only", + }, + }, + _SBE.SETTINGS_ADDITIONAL: { + _SPE.RECEPTOR: attach_root_path(PATHS_EXAMPLEDATA.RECEPTOR_1UYD) + }, + }, + } + prime_step = StepPrime(**step_conf) + prime_step.data.compounds = self._conformers + prime_step.execute() + out_path = os.path.join(self._test_dir, "prime_conformers_output.sdf") + prime_step.write_conformers(out_path) + scores = [ + "-46.4912412523436", + "-49.5744863214668", + "-63.4520243626994", + "-55.2546247037599", + "-35.0457131568983", + "-37.584671678831", + "-52.3315306739823", + "-42.1457765778323", + "-39.0962071597705", + "-46.9267618228951", + "-41.4015029031088", + "-49.0027294452047", + "-45.297078493255", + "-47.1669750502297", + "-50.2110899116497", + "-38.8494636817877", + "-41.6326792228592", + "-43.6924482130898", + "-46.738882435201", + "-45.242419676907", + "-36.5693940219298", + "-57.9606138506851", + "-55.4918326231546", + "-39.724716804717", + "-50.0105377772616", + "-46.9162249942074", + "-46.2790546176639", + "-43.8232309398354", + "-49.7540870967205", + "-53.7133446915177", + "-51.6633994627191", + "-54.2858218610409", + "-42.9129639283819", + "-49.1980564160085", + "-52.7421500005312", + "-50.953927771995", + "-59.8079546364734", + "-53.20869108637", + "-42.9971732771755", + "-46.3393621442165", + "-39.1124509414121", + "-26.9291589283248", + "-48.0546634882376", + "-58.0973312599281", + "-52.8690868697358", + ] + flattened_conformers_scores = [] + for compound in prime_step.data.compounds: + for enumeration in compound.get_enumerations(): + for conformer in enumeration.get_conformers(): + flattened_conformers_scores.append( + conformer.get_molecule().GetProp(_SPE.MMGBSA_SCORE) + ) + # self.assertEqual(float(prime_step.get_compounds()[0].get_enumerations()[0].get_conformers()[0].get_molecule()\ + # .GetProp('r_psp_MMGBSA_dG_Bind')), -69.9651350867098) + + for trial, value in zip(flattened_conformers_scores, scores): + self.assertEqual(round(float(trial)), round(float(value))) diff --git a/tests/shaep/__init__.py b/tests/shaep/__init__.py new file mode 100644 index 0000000..4f7a309 --- /dev/null +++ b/tests/shaep/__init__.py @@ -0,0 +1 @@ +from tests.shaep.test_shaep import * diff --git a/tests/shaep/test_shaep.py b/tests/shaep/test_shaep.py new file mode 100644 index 0000000..fa63496 --- /dev/null +++ b/tests/shaep/test_shaep.py @@ -0,0 +1,72 @@ +from icolos.core.containers.generic import GenericData +from icolos.utils.enums.program_parameters import ShaepEnum +from tests.tests_paths import ( + PATHS_EXAMPLEDATA, + get_mol_as_Compound, + get_mol_as_Conformer, + MAIN_CONFIG, +) +import unittest +import os + +from icolos.utils.enums.step_enums import StepBaseEnum, StepShaepEnum +from icolos.core.workflow_steps.calculation.shaep import StepShaep +from icolos.utils.general.files_paths import attach_root_path + +_SBE = StepBaseEnum +_SSE = StepShaepEnum() +_SE = ShaepEnum() + + +class Test_Shaep(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/shaep") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + # TODO: update to load at least 3 compounds docked (at least 5 poses each) + mol = get_mol_as_Compound(PATHS_EXAMPLEDATA.SHAEP_LIGAND_DOCKED_POSE) + conf = get_mol_as_Conformer(PATHS_EXAMPLEDATA.SHAEP_LIGAND_DOCKED_POSE) + mol[0].add_conformers(conf, auto_update=True) + self.mol = mol + + with open(PATHS_EXAMPLEDATA.PANTHER_NEGATIVE_IMAGE, "r") as f: + self.negative_image = f.read() + + def test_shaep(self): + step_conf = { + _SBE.STEPID: "01_shaep", + _SBE.STEP_TYPE: _SBE.STEP_SHAEP, + _SBE.EXEC: {_SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["SHAEP_LOCATION"]}, + } + shaep_step = StepShaep(**step_conf) + shaep_step.data.compounds = [self.mol] + shaep_step.data.generic.add_file( + GenericData(file_name="neg_image.mol2", file_data=self.negative_image) + ) + shaep_step.execute() + + self.assertEqual( + float( + shaep_step.get_compounds()[0][0][0] + .get_molecule() + .GetProp(_SE.TAG_SHAPE_SIMILARITY) + ), + 0.737409, + ) + self.assertEqual( + float( + shaep_step.get_compounds()[0][0][0] + .get_molecule() + .GetProp(_SE.TAG_ESP_SIMILARITY) + ), + 0.106811, + ) + + # check, whether the tags got added + out_path = os.path.join(self._test_dir, "mols_nibr.sdf") + shaep_step.write_conformers(out_path) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 17358) diff --git a/tests/step_utils/__init__.py b/tests/step_utils/__init__.py new file mode 100644 index 0000000..053700f --- /dev/null +++ b/tests/step_utils/__init__.py @@ -0,0 +1,4 @@ +from tests.step_utils.test_input_merger import * +from tests.step_utils.test_input_preparator import * +from tests.step_utils.test_run_variables_resolver import * +from tests.step_utils.test_writeout import * diff --git a/tests/step_utils/test_input_merger.py b/tests/step_utils/test_input_merger.py new file mode 100644 index 0000000..7c33b4d --- /dev/null +++ b/tests/step_utils/test_input_merger.py @@ -0,0 +1,262 @@ +import unittest + +from icolos.core.step_utils.input_merger import InputMerger, StepMerge +from icolos.core.containers.compound import Conformer, Enumeration, Compound + +from icolos.utils.enums.step_enums import StepBaseEnum + +_SBE = StepBaseEnum + + +class Test_InputMerger(unittest.TestCase): + @classmethod + def setUpClass(cls): + pass + + def setUp(self): + # comp1 has 2 enumerations, one with 2 and one with 3 conformers + comp1 = Compound(name="test_molecule", compound_number=0) + comp1_enum1 = Enumeration(smile="abc", molecule=None, enumeration_id=1) + comp1_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp1_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp1_enum2 = Enumeration(smile="def", molecule=None, enumeration_id=2) + comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp1.add_enumeration(comp1_enum1, auto_update=False) + comp1.add_enumeration(comp1_enum2, auto_update=False) + + # comp2 has 3 enumerations, one with 1, one with 3 and one with 4 conformers + comp2 = Compound(name="test_molecule_new", compound_number=0) + comp2_enum1 = Enumeration(smile="kk", molecule=None, enumeration_id=0) + comp2_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp2_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp2_enum2 = Enumeration(smile="abc", molecule=None, enumeration_id=1) + comp2_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp2_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp2_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp2_enum3 = Enumeration(smile="xyz", molecule=None, enumeration_id=2) + comp2_enum3.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp2_enum3.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp2_enum3.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp2_enum3.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp2.add_enumeration(comp2_enum1, auto_update=False) + comp2.add_enumeration(comp2_enum2, auto_update=False) + comp2.add_enumeration(comp2_enum3, auto_update=False) + + # comp3 has 1 enumeration, with 2 conformers (and a different number and name) + comp3 = Compound(name="test_molecule", compound_number=1) + comp3_enum1 = Enumeration(smile="abc", molecule=None, enumeration_id=0) + comp3_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp3_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp3_enum2 = Enumeration(smile="def", molecule=None, enumeration_id=1) + comp3_enum2.add_conformer(Conformer(conformer_id=0), auto_update=False) + comp3_enum2.add_conformer(Conformer(conformer_id=0), auto_update=False) + comp3_enum2.add_conformer(Conformer(conformer_id=0), auto_update=False) + comp3.add_enumeration(comp3_enum1, auto_update=False) + comp3.add_enumeration(comp3_enum2, auto_update=False) + self.list_compounds = [comp1, comp2, comp3] + + @classmethod + def tearDownClass(cls): + pass + + def test_merging_by_name_compound(self): + conf = { + _SBE.INPUT_MERGE_COMPOUNDS: True, + _SBE.INPUT_MERGE_COMPOUNDS_BY: _SBE.INPUT_MERGE_BY_NAME, + _SBE.INPUT_MERGE_ENUMERATIONS: False, + } + conf = StepMerge(**conf) + merger = InputMerger(conf) + list_compounds = merger.merge(self.list_compounds) + + self.assertEqual(len(list_compounds), 2) + self.assertEqual(len(list_compounds[0].get_enumerations()), 4) + self.assertEqual(len(list_compounds[1].get_enumerations()), 3) + + self.assertListEqual( + [c.get_name() for c in list_compounds], + ["test_molecule", "test_molecule_new"], + ) + self.assertListEqual( + [ + conf.get_index_string() + for c in list_compounds + for e in c.get_enumerations() + for conf in e.get_conformers() + ], + [ + "0:0:0", + "0:0:1", + "0:1:0", + "0:1:1", + "0:1:2", + "0:2:0", + "0:2:1", + "0:3:0", + "0:3:1", + "0:3:2", + "1:0:0", + "1:0:1", + "1:1:0", + "1:1:1", + "1:1:2", + "1:2:0", + "1:2:1", + "1:2:2", + "1:2:3", + ], + ) + + def test_merging_by_id_compound(self): + conf = { + _SBE.INPUT_MERGE_COMPOUNDS: True, + _SBE.INPUT_MERGE_COMPOUNDS_BY: _SBE.INPUT_MERGE_BY_ID, + _SBE.INPUT_MERGE_ENUMERATIONS: False, + } + conf = StepMerge(**conf) + merger = InputMerger(conf) + list_compounds = merger.merge(self.list_compounds) + + self.assertEqual(len(list_compounds), 2) + self.assertEqual(len(list_compounds[0].get_enumerations()), 5) + self.assertEqual(len(list_compounds[1].get_enumerations()), 2) + + self.assertListEqual([c.get_name() for c in list_compounds], ["0", "1"]) + + self.assertListEqual( + [ + conf.get_index_string() + for c in list_compounds + for e in c.get_enumerations() + for conf in e.get_conformers() + ], + [ + "0:0:0", + "0:0:1", + "0:1:0", + "0:1:1", + "0:1:2", + "0:2:0", + "0:2:1", + "0:3:0", + "0:3:1", + "0:3:2", + "0:4:0", + "0:4:1", + "0:4:2", + "0:4:3", + "1:0:0", + "1:0:1", + "1:1:0", + "1:1:1", + "1:1:2", + ], + ) + + def test_merging_by_name_compound_enumeration_smile(self): + conf = { + _SBE.INPUT_MERGE_COMPOUNDS: True, + _SBE.INPUT_MERGE_COMPOUNDS_BY: _SBE.INPUT_MERGE_BY_NAME, + _SBE.INPUT_MERGE_ENUMERATIONS: True, + _SBE.INPUT_MERGE_ENUMERATIONS_BY: _SBE.INPUT_MERGE_BY_SMILE, + } + conf = StepMerge(**conf) + merger = InputMerger(conf) + list_compounds = merger.merge(self.list_compounds) + + self.assertEqual(len(list_compounds), 2) + self.assertEqual(len(list_compounds[0].get_enumerations()), 2) + self.assertEqual(len(list_compounds[1].get_enumerations()), 3) + + self.assertListEqual( + [c.get_name() for c in list_compounds], + ["test_molecule", "test_molecule_new"], + ) + self.assertListEqual( + [ + conf.get_index_string() + for c in list_compounds + for e in c.get_enumerations() + for conf in e.get_conformers() + ], + [ + "0:0:0", + "0:0:1", + "0:0:2", + "0:0:3", + "0:1:0", + "0:1:1", + "0:1:2", + "0:1:3", + "0:1:4", + "0:1:5", + "1:0:0", + "1:0:1", + "1:1:0", + "1:1:1", + "1:1:2", + "1:2:0", + "1:2:1", + "1:2:2", + "1:2:3", + ], + ) + self.assertListEqual( + [e.get_smile() for c in list_compounds for e in c.get_enumerations()], + ["abc", "def", "kk", "abc", "xyz"], + ) + + def test_merging_by_name_compound_enumeration_id(self): + conf = { + _SBE.INPUT_MERGE_COMPOUNDS: True, + _SBE.INPUT_MERGE_COMPOUNDS_BY: _SBE.INPUT_MERGE_BY_NAME, + _SBE.INPUT_MERGE_ENUMERATIONS: True, + _SBE.INPUT_MERGE_ENUMERATIONS_BY: _SBE.INPUT_MERGE_BY_ID, + } + conf = StepMerge(**conf) + merger = InputMerger(conf) + list_compounds = merger.merge(self.list_compounds) + + self.assertEqual(len(list_compounds), 2) + self.assertEqual(len(list_compounds[0].get_enumerations()), 3) + self.assertEqual(len(list_compounds[1].get_enumerations()), 3) + + self.assertListEqual( + [c.get_name() for c in list_compounds], + ["test_molecule", "test_molecule_new"], + ) + self.assertListEqual( + [ + conf.get_index_string() + for c in list_compounds + for e in c.get_enumerations() + for conf in e.get_conformers() + ], + [ + "0:0:0", + "0:0:1", + "0:0:2", + "0:0:3", + "0:0:4", + "0:1:0", + "0:1:1", + "0:1:2", + "0:2:0", + "0:2:1", + "1:0:0", + "1:0:1", + "1:1:0", + "1:1:1", + "1:1:2", + "1:2:0", + "1:2:1", + "1:2:2", + "1:2:3", + ], + ) + self.assertListEqual( + [e.get_smile() for c in list_compounds for e in c.get_enumerations()], + ["abc", "def", "abc", "kk", "abc", "xyz"], + ) diff --git a/tests/step_utils/test_input_preparator.py b/tests/step_utils/test_input_preparator.py new file mode 100644 index 0000000..18c243c --- /dev/null +++ b/tests/step_utils/test_input_preparator.py @@ -0,0 +1,101 @@ +import os +import unittest +from icolos.core.composite_agents.workflow import WorkFlow +from icolos.core.step_utils.input_preparator import ( + InputPreparator, + StepInputParameters, + StepInputSource, +) +from icolos.core.containers.compound import Conformer, Enumeration, Compound +from icolos.core.workflow_steps.step import StepBase +from icolos.utils.enums.step_enums import StepBaseEnum +from icolos.utils.general.files_paths import attach_root_path +from tests.tests_paths import PATHS_EXAMPLEDATA + +_SBE = StepBaseEnum + + +class Test_InputPreparator(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/InputPreparator") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + # comp1 has 2 enumerations, one with 2 and one with 3 conformers + comp1 = Compound(name="test_molecule", compound_number=0) + comp1_enum1 = Enumeration(smile="abc", molecule=None, enumeration_id=1) + comp1_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp1_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp1_enum2 = Enumeration(smile="def", molecule=None, enumeration_id=2) + comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp1.add_enumeration(comp1_enum1, auto_update=False) + comp1.add_enumeration(comp1_enum2, auto_update=False) + + source1 = StepInputSource( + source="mol1:cccccc1", + source_type=_SBE.INPUT_SOURCE_TYPE_STRING, + source_field="new_string", + ) + source2 = StepInputSource( + source="prev_step", source_type=_SBE.INPUT_SOURCE_TYPE_STEP + ) + source3 = StepInputSource( + source="mock_step", + source_type=_SBE.INPUT_SOURCE_TYPE_STEP, + source_field="old_input_field", + target_field="new_input_field", + ) + source4 = StepInputSource( + source="mol2:cccc1", source_type=_SBE.INPUT_SOURCE_TYPE_STRING + ) + source5 = StepInputSource( + source=attach_root_path(PATHS_EXAMPLEDATA.PARACETAMOL_COSMO), + source_type=_SBE.INPUT_SOURCE_TYPE_PATH, + source_field="cosmo", + target_field="cosmo", + ) + source6 = StepInputSource( + source=attach_root_path(PATHS_EXAMPLEDATA.PARACETAMOL_COSMO), + source_type=_SBE.INPUT_SOURCE_TYPE_FILE, + source_field="cosmo_filepath", + target_field="cosmo_test_file", + ) + source7 = StepInputSource( + source=attach_root_path(PATHS_EXAMPLEDATA.PANTHER_NEGATIVE_IMAGE), + extension="mol2", + ) + self.params = StepInputParameters( + compounds=[source1, source4, source2], generic=[source7] + ) + blank_params = StepInputParameters(compounds=[], generic=[]) + mock_step = StepBase(step_id="mock_step", type=None, input=self.params) + prev_step = StepBase(step_id="prev_step", type=None, input=blank_params) + prev_step.data.compounds = [comp1] + + workflow = WorkFlow() + workflow.add_step(prev_step) + workflow.add_step(mock_step) + self.workflow = workflow + + @classmethod + def tearDownClass(cls): + pass + + def test_input_preparation(self): + preparator = InputPreparator(workflow=self.workflow, logger=None) + data, work_dir = preparator.generate_input( + step_input=self.params, step_type=_SBE.STEP_SHAEP + ) + self.assertEqual(len(data.compounds), 3) + self.assertEqual(len(data.generic.get_all_files()), 1) + with open(attach_root_path(PATHS_EXAMPLEDATA.PANTHER_NEGATIVE_IMAGE), "r") as f: + file = f.read() + self.assertEqual( + data.generic.get_file_by_name("panther_test_output.mol2").get_data(), file + ) + self.assertEqual(len(data.compounds[1]), 1) + self.assertEqual((len(data.compounds[2][1])), 3) diff --git a/tests/step_utils/test_run_variables_resolver.py b/tests/step_utils/test_run_variables_resolver.py new file mode 100644 index 0000000..4ceca8c --- /dev/null +++ b/tests/step_utils/test_run_variables_resolver.py @@ -0,0 +1,164 @@ +import unittest + +from icolos.core.containers.compound import Conformer, Enumeration, Compound +from icolos.core.step_utils.run_variables_resolver import RunVariablesResolver +from icolos.utils.enums.step_enums import StepBaseEnum + +_SBE = StepBaseEnum + + +class Test_RunVariablesResolver(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.resolver = RunVariablesResolver() + + def setUp(self): + # comp1 has 2 enumerations, one with 2 and one with 3 conformers + comp1 = Compound(name="test_molecule", compound_number=0) + comp1_enum1 = Enumeration( + smile="abc", molecule=None, enumeration_id=1, compound_object=comp1 + ) + comp1_enum1.add_conformer( + Conformer(conformer_id=0, enumeration_object=comp1_enum1), auto_update=True + ) + comp1_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp1_enum2 = Enumeration(smile="def", molecule=None, enumeration_id=2) + comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp1.add_enumeration(comp1_enum1, auto_update=False) + comp1.add_enumeration(comp1_enum2, auto_update=False) + + # comp2 has 3 enumerations, one with 1, one with 3 and one with 4 conformers + comp2 = Compound(name="test_molecule_new", compound_number=0) + comp2_enum1 = Enumeration(smile="kk", molecule=None, enumeration_id=0) + comp2_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp2_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp2_enum2 = Enumeration(smile="abc", molecule=None, enumeration_id=1) + comp2_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp2_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp2_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp2_enum3 = Enumeration(smile="xyz", molecule=None, enumeration_id=2) + comp2_enum3.add_conformer( + Conformer(conformer_id=0, enumeration_object=comp2_enum3), auto_update=True + ) + comp2_enum3.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp2_enum3.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp2_enum3.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp2.add_enumeration(comp2_enum1, auto_update=False) + comp2.add_enumeration(comp2_enum2, auto_update=False) + comp2.add_enumeration(comp2_enum3, auto_update=False) + + # comp3 has 1 enumeration, with 2 conformers (and a different number and name) + comp3 = Compound(name="test_molecule", compound_number=1) + comp3_enum1 = Enumeration(smile="abc", molecule=None, enumeration_id=0) + comp3_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp3_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True) + comp3_enum2 = Enumeration( + smile="def", molecule=None, enumeration_id=1, compound_object=comp3 + ) + comp3_enum2.add_conformer(Conformer(conformer_id=0), auto_update=False) + comp3_enum2.add_conformer(Conformer(conformer_id=0), auto_update=False) + comp3_enum2.add_conformer(Conformer(conformer_id=0), auto_update=False) + comp3.add_enumeration(comp3_enum1, auto_update=False) + comp3.add_enumeration(comp3_enum2, auto_update=False) + self.list_compounds = [comp1, comp2, comp3] + + @classmethod + def tearDownClass(cls): + pass + + def test_compound_replacements(self): + inp = "/a/path/to/nowhere/[compound_id]/[compound_id]/compound_id/whatever/[compound_name]" + self.assertEqual( + self.resolver.resolve_compound_level(inp, self.list_compounds[0]), + "/a/path/to/nowhere/0/0/compound_id/whatever/test_molecule", + ) + self.assertEqual( + self.resolver.resolve_compound_level(inp, self.list_compounds[1]), + "/a/path/to/nowhere/0/0/compound_id/whatever/test_molecule_new", + ) + self.assertEqual( + self.resolver.resolve_compound_level(inp, self.list_compounds[2]), + "/a/path/to/nowhere/1/1/compound_id/whatever/test_molecule", + ) + + # test what happens, when no replacement is done + inp = "/a/string/withouttreplacement" + self.assertEqual( + self.resolver.resolve_compound_level(inp, self.list_compounds[0]), inp + ) + + def test_enumeration_replacements(self): + inp = "/a/path/to/nowhere/[compound_id]/[enumeration_id]/[enumeration_string]/whatever/[enumeration_id]" + self.assertEqual( + self.resolver.resolve_enumeration_level(inp, self.list_compounds[0][0]), + "/a/path/to/nowhere/[compound_id]/1/0:1/whatever/1", + ) + self.assertEqual( + self.resolver.resolve_enumeration_level(inp, self.list_compounds[0][1]), + "/a/path/to/nowhere/[compound_id]/2/:2/whatever/2", + ) + self.assertEqual( + self.resolver.resolve_enumeration_level(inp, self.list_compounds[2][1]), + "/a/path/to/nowhere/[compound_id]/1/1:1/whatever/1", + ) + + # test what happens, when no replacement is done + inp = "/a/string/withouttreplacement" + self.assertEqual( + self.resolver.resolve_enumeration_level(inp, self.list_compounds[0][0]), inp + ) + + def test_conformer_replacements(self): + inp = "/a/path/[conformer_string]to/nowhere/[compound_id]/[conformer_id]/[enumeration_string]/whatever/[conformer_id]" + self.assertEqual( + self.resolver.resolve_conformer_level(inp, self.list_compounds[0][0][0]), + "/a/path/0:1:0to/nowhere/[compound_id]/0/[enumeration_string]/whatever/0", + ) + self.assertEqual( + self.resolver.resolve_conformer_level(inp, self.list_compounds[0][0][1]), + "/a/path/0:1:1to/nowhere/[compound_id]/1/[enumeration_string]/whatever/1", + ) + self.assertEqual( + self.resolver.resolve_conformer_level(inp, self.list_compounds[2][0][1]), + "/a/path/:0:1to/nowhere/[compound_id]/1/[enumeration_string]/whatever/1", + ) + self.assertEqual( + self.resolver.resolve_conformer_level(inp, self.list_compounds[1][2][0]), + "/a/path/:2:0to/nowhere/[compound_id]/0/[enumeration_string]/whatever/0", + ) + + # test what happens, when no replacement is done + inp = "/a/string/withouttreplacement" + self.assertEqual( + self.resolver.resolve_conformer_level(inp, self.list_compounds[0][0][0]), + inp, + ) + + def test_resolve(self): + inp = "/a/path/[conformer_string]to/nowhere/[compound_id]/[conformer_id]/[enumeration_string]/whatever/[compound_name]" + self.assertEqual( + self.resolver.resolve(inp, self.list_compounds[0][0][0]), + "/a/path/0:1:0to/nowhere/0/0/0:1/whatever/test_molecule", + ) + self.assertEqual( + self.resolver.resolve(inp, self.list_compounds[0][0]), + "/a/path/[conformer_string]to/nowhere/0/[conformer_id]/0:1/whatever/test_molecule", + ) + self.assertEqual( + self.resolver.resolve(inp, self.list_compounds[0]), + "/a/path/[conformer_string]to/nowhere/0/[conformer_id]/[enumeration_string]/whatever/test_molecule", + ) + + # fails for cases where the linking conformer -> enumeration -> compound is not established + try: + self.resolver.resolve(inp, self.list_compounds[2][0][1]) + except Exception as e: + self.assertEqual( + e.__str__(), "'NoneType' object has no attribute 'get_compound_number'" + ) + + # test what happens, when no replacement is done + inp = "/a/string/withouttreplacement" + self.assertEqual(self.resolver.resolve(inp, self.list_compounds[0][0][0]), inp) diff --git a/tests/step_utils/test_structconvert.py b/tests/step_utils/test_structconvert.py new file mode 100644 index 0000000..b11a601 --- /dev/null +++ b/tests/step_utils/test_structconvert.py @@ -0,0 +1,32 @@ +import os +import unittest +from icolos.core.step_utils.structconvert import StructConvert +from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum +from icolos.utils.enums.step_enums import StepBaseEnum +from icolos.utils.general.files_paths import attach_root_path, remove_folder +from tests.tests_paths import PATHS_EXAMPLEDATA + +_SBE = StepBaseEnum +_SEE = SchrodingerExecutablesEnum() + + +class Test_Structconvert(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/structconvert") + remove_folder(cls._test_dir) + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + pass + + def test_sdf2pdb(self): + executor = StructConvert(prefix_execution=_SEE.SCHRODINGER_MODULE) + output_path = os.path.join(self._test_dir, "output_small_molecule.pdb") + executor.sdf2pdb( + sdf_file=PATHS_EXAMPLEDATA.SMALL_MOLECULE_SDF_PATH, pdb_file=output_path + ) + + stat_inf = os.stat(output_path) + self.assertEqual(stat_inf.st_size, 2209) diff --git a/tests/step_utils/test_writeout.py b/tests/step_utils/test_writeout.py new file mode 100644 index 0000000..e5bac42 --- /dev/null +++ b/tests/step_utils/test_writeout.py @@ -0,0 +1,335 @@ +import os +import unittest +from icolos.core.containers.generic import GenericContainer, GenericData +from icolos.core.containers.compound import Compound, Enumeration +from icolos.core.step_utils.input_preparator import StepData +from icolos.core.step_utils.step_writeout import WriteOutHandler +from icolos.utils.enums.step_enums import StepBaseEnum +from icolos.utils.general.files_paths import attach_root_path, remove_folder +from tests.tests_paths import PATHS_EXAMPLEDATA, get_mol_as_Conformer +import shutil + +_SBE = StepBaseEnum + + +class Test_WriteOut(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/write-out") + remove_folder(cls._test_dir) + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + comp = Compound(compound_number=1, name="paracetamol") + enum_mol = get_mol_as_Conformer(PATHS_EXAMPLEDATA.PARACETAMOL_PATH)[ + 0 + ].get_molecule() + comp.add_enumeration(Enumeration(molecule=enum_mol), auto_update=True) + conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS) + comp[0].add_conformers(conformers, auto_update=True) + self.compound = comp + + comp2 = Compound(compound_number=2) + comp2.add_enumeration(Enumeration(molecule=enum_mol), auto_update=True) + conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS) + comp2[0].add_conformers(conformers, auto_update=True) + self.compound2 = comp2 + + @classmethod + def tearDownClass(cls): + pass + + def test_conformer_writeout_merged(self): + conf = { + _SBE.WRITEOUT_CONFIG: { + _SBE.WRITEOUT_COMP: { + _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE, + _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_SDF, + }, + } + } + writeout_handler = WriteOutHandler(**conf) + writeout_handler.set_data(StepData(compounds=[self.compound, self.compound2])) + + writeout_handler.config.destination.resource = os.path.join( + self._test_dir, "both_compounds.sdf" + ) + writeout_handler.write() + stat_inf = os.stat(os.path.join(self._test_dir, "both_compounds.sdf")) + self.assertGreater(stat_inf.st_size, 39000) + + def test_conformer_writeout_split(self): + conf = { + _SBE.WRITEOUT_CONFIG: { + _SBE.WRITEOUT_COMP: { + _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE, + _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_SDF, + _SBE.WRITEOUT_DESTINATION_MERGE: False, + }, + } + } + writeout_handler = WriteOutHandler(**conf) + writeout_handler.set_data(StepData(compounds=[self.compound, self.compound2])) + + writeout_handler.config.destination.resource = os.path.join( + self._test_dir, "[compound_id]_split.sdf" + ) + writeout_handler.write() + stat_inf = os.stat(os.path.join(self._test_dir, "1_split.sdf")) + self.assertGreater(stat_inf.st_size, 19900) + stat_inf = os.stat(os.path.join(self._test_dir, "2_split.sdf")) + self.assertGreater(stat_inf.st_size, 19500) + + def test_extradata_writeout(self): + conf = { + _SBE.WRITEOUT_CONFIG: { + _SBE.WRITEOUT_COMP: { + _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_EXTRADATA, + _SBE.WRITEOUT_COMP_KEY: "testdata", + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE, + _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_TXT, + }, + } + } + self.compound[0][0].add_extra_data("testdata", ["this\n", "is\n", "a\ntest"]) + self.compound[0][1].add_extra_data( + "testdata", "YETANOTHERTEST\nthis\nis\na\ntest" + ) + self.compound[0]._conformers = [self.compound[0][0], self.compound[0][1]] + + writeout_handler = WriteOutHandler(**conf) + writeout_handler.set_data(StepData(compounds=[self.compound])) + + # generate two files + writeout_handler.config.destination.resource = os.path.join( + self._test_dir, "extra_writeout/[conformer_id].txt" + ) + writeout_handler.write() + stat_inf = os.stat(os.path.join(self._test_dir, "extra_writeout/0.txt")) + self.assertEqual(stat_inf.st_size, 15) + stat_inf = os.stat(os.path.join(self._test_dir, "extra_writeout/1.txt")) + self.assertEqual(stat_inf.st_size, 29) + + def test_tabular_writeout(self): + config = { + _SBE.WRITEOUT_CONFIG: { + _SBE.WRITEOUT_COMP: { + _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE, + _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_CSV, + }, + } + } + for idx, conf in enumerate(self.compound[0].get_conformers()): + conf.get_molecule().SetProp("Gsolv_whatever", str(idx)) + self.compound[0][3].get_molecule().SetProp("another_prop", "bbc") + writeout_handler = WriteOutHandler(**config) + writeout_handler.set_data(StepData(compounds=[self.compound])) + + # write-out without selecting any tags + writeout_handler.config.destination.resource = os.path.join( + self._test_dir, "tabular_notagsselected_[conformer_id].csv" + ) + writeout_handler.write() + stat_inf = os.stat(os.path.join(self._test_dir, "tabular_notagsselected_0.csv")) + self.assertGreater(stat_inf.st_size, 250) + + # write-out with selecting tags + writeout_handler.config.destination.resource = os.path.join( + self._test_dir, "tabular_tagsselected_[conformer_id].csv" + ) + writeout_handler.config.compounds.selected_tags = [ + "Gsolv_whatever", + "Gsolv_dmso", + "another_prop", + ] + writeout_handler.write() + stat_inf = os.stat(os.path.join(self._test_dir, "tabular_tagsselected_0.csv")) + self.assertGreater(stat_inf.st_size, 300) + + def test_tabular_writeout_aggregate(self): + config = { + _SBE.WRITEOUT_CONFIG: { + _SBE.WRITEOUT_COMP: { + _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE, + _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_CSV, + }, + } + } + for idx, conf in enumerate(self.compound[0].get_conformers()): + conf.get_molecule().SetProp("Gsolv_whatever", str(idx)) + self.compound[0][3].get_molecule().SetProp("another_prop", "bbc") + writeout_handler = WriteOutHandler(**config) + writeout_handler.set_data(StepData(compounds=[self.compound])) + + # write-out without selecting tags and using compound-level aggregation + writeout_handler.config.destination.resource = os.path.join( + self._test_dir, "tabular_notagsselected_[conformer_id]_compagg.csv" + ) + writeout_handler.config.compounds.aggregation.mode = ( + _SBE.WRITEOUT_COMP_AGGREGATION_MODE_BESTPERCOMPOUND + ) + writeout_handler.config.compounds.selected_tags = ["Gsolv_dmso"] + writeout_handler.config.compounds.aggregation.key = "Gsolv_dmso" + writeout_handler.write() + stat_inf = os.stat( + os.path.join(self._test_dir, "tabular_notagsselected_6_compagg.csv") + ) + self.assertEqual(stat_inf.st_size, 56) + + # write-out without selecting tags and using compound-level aggregation (reverse) + writeout_handler.config.destination.resource = os.path.join( + self._test_dir, "tabular_notagsselected_[conformer_id]_compagg.csv" + ) + writeout_handler.config.compounds.aggregation.mode = ( + _SBE.WRITEOUT_COMP_AGGREGATION_MODE_BESTPERCOMPOUND + ) + writeout_handler.config.compounds.selected_tags = ["Gsolv_dmso"] + writeout_handler.config.compounds.aggregation.key = "Gsolv_dmso" + writeout_handler.config.compounds.aggregation.highest_is_best = False + writeout_handler.write() + stat_inf = os.stat( + os.path.join(self._test_dir, "tabular_notagsselected_7_compagg.csv") + ) + self.assertEqual(stat_inf.st_size, 56) + + def test_reinvent_writeout_empty(self): + config = { + _SBE.WRITEOUT_CONFIG: { + _SBE.WRITEOUT_COMP: { + _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_REINVENT, + _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_JSON, + _SBE.WRITEOUT_DESTINATION_RESOURCE: os.path.join( + self._test_dir, "reinvent_empty.json" + ), + }, + } + } + for idx, conf in enumerate(self.compound[0].get_conformers()): + conf.get_molecule().SetProp("Gsolv_whatever", str(idx)) + self.compound[0].clear_conformers() + writeout_handler = WriteOutHandler(**config) + writeout_handler.set_data(StepData(compounds=[self.compound])) + + writeout_handler.config.compounds.selected_tags = [ + "conformer_energy", + "G_octanol", + ] + + # write-out to console (REINVENT style) + writeout_handler.write() + + # write-out to file + out_path = os.path.join(self._test_dir, "reinvent_empty.json") + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 300) + + def test_reinvent_writeout_merged(self): + config = { + _SBE.WRITEOUT_CONFIG: { + _SBE.WRITEOUT_COMP: { + _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS + }, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_REINVENT, + _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_JSON, + _SBE.WRITEOUT_DESTINATION_RESOURCE: os.path.join( + self._test_dir, "reinvent.json" + ), + }, + } + } + for idx, conf in enumerate(self.compound[0].get_conformers()): + conf.get_molecule().SetProp("Gsolv_whatever", str(idx)) + self.compound[0][3].get_molecule().SetProp("another_prop", "bbc") + writeout_handler = WriteOutHandler(**config) + writeout_handler.set_data(StepData(compounds=[self.compound])) + + writeout_handler.config.compounds.selected_tags = [ + "conformer_energy", + "G_octanol", + ] + + # write-out to console (REINVENT style) + writeout_handler.write() + + # write-out to file + out_path = os.path.join(self._test_dir, "reinvent.json") + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 300) + + def test_generic_writeout(self): + conf = { + _SBE.WRITEOUT_CONFIG: { + _SBE.WRITEOUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "txt"}, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE, + _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_TXT, + }, + } + } + gc = GenericContainer() + gc.add_file( + GenericData( + file_name="anothertest.txt", + file_data="YETANOTHERTEST\nthis\nis\na\ntest", + ) + ) + writeout_handler = WriteOutHandler(**conf) + writeout_handler.set_data(StepData(generic=gc)) + + # generate two files + out_path = os.path.join(self._test_dir, "anothertest.txt") + writeout_handler.config.destination.resource = out_path + writeout_handler.write() + out_path = os.path.join(self._test_dir, "anothertest_0.txt") + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 29) + + def test_generic_writeout_path(self): + conf = { + _SBE.WRITEOUT_CONFIG: { + _SBE.WRITEOUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "xtc"}, + _SBE.WRITEOUT_DESTINATION: { + _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE, + _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_TXT, + }, + } + } + + writeout_handler = WriteOutHandler(**conf) + # simulate the data being a path to a large file on disk + gc = GenericContainer() + gc.add_file( + GenericData( + file_name="md_0_1.xtc", file_data=PATHS_EXAMPLEDATA.GROMACS_PDB_FILE + ) + ) + writeout_handler.set_data(StepData(generic=gc)) + out_path = os.path.join(self._test_dir, "md_0_1.xtc") + writeout_handler.config.destination.resource = out_path + out_path = os.path.join(self._test_dir, "md_0_1_0.xtc") + writeout_handler.write() + + # reset the files since by default it gets removed from the source location + if not os.path.isfile(PATHS_EXAMPLEDATA.GROMACS_PDB_FILE): + shutil.copyfile(out_path, PATHS_EXAMPLEDATA.GROMACS_PDB_FILE) + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 53635) diff --git a/tests/structure_prediction/__init__.py b/tests/structure_prediction/__init__.py new file mode 100644 index 0000000..a8c2721 --- /dev/null +++ b/tests/structure_prediction/__init__.py @@ -0,0 +1,5 @@ +from tests.structure_prediction.test_peptide_embedder import * +from tests.structure_prediction.test_pdb_fixer import * + +# from tests.structure_prediction.test_dssp import * +# TODO: work out why the dssp unit test hangs sometimes diff --git a/tests/structure_prediction/test_dssp.py b/tests/structure_prediction/test_dssp.py new file mode 100644 index 0000000..e59afca --- /dev/null +++ b/tests/structure_prediction/test_dssp.py @@ -0,0 +1,52 @@ +from icolos.core.containers.generic import GenericData +import unittest +from icolos.core.workflow_steps.structure_prediction.dssp import StepDSSP +from icolos.utils.general.files_paths import attach_root_path +import os +from tests.tests_paths import PATHS_EXAMPLEDATA +from icolos.utils.enums.step_enums import StepBaseEnum + +_SBE = StepBaseEnum + + +class TestDSSP(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/structure_prediction") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + with open(attach_root_path(PATHS_EXAMPLEDATA.DSSP_PDB_1), "r") as f: + self.pdb1 = f.read() + + with open(attach_root_path(PATHS_EXAMPLEDATA.DSSP_PDB_2), "r") as f: + self.pdb2 = f.read() + + with open(attach_root_path(PATHS_EXAMPLEDATA.DSSP_PDB_3), "r") as f: + self.pdb3 = f.read() + + def test_dssp(self): + step_conf = { + _SBE.STEPID: "01_DSSP", + _SBE.STEP_TYPE: _SBE.STEP_DSSP, + _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load DSSP"}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"--output-format": "dssp"} + }, + _SBE.SETTINGS_ADDITIONAL: {}, + }, + } + + step_dssp = StepDSSP(**step_conf) + pdb1 = GenericData(file_name="test_1.pdb", file_data=self.pdb1) + pdb2 = GenericData(file_name="test_2.pdb", file_data=self.pdb2) + pdb3 = GenericData(file_name="test_3.pdb", file_data=self.pdb3) + step_dssp.data.generic.add_files([pdb1, pdb2, pdb3]) + step_dssp.execute() + + out_path = os.path.join(self._test_dir, "dssp_output_test_1.txt") + step_dssp.write_generic_by_name(self._test_dir, "dssp_output_test_1.txt") + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 1234) diff --git a/tests/structure_prediction/test_pdb_fixer.py b/tests/structure_prediction/test_pdb_fixer.py new file mode 100644 index 0000000..9c88b3a --- /dev/null +++ b/tests/structure_prediction/test_pdb_fixer.py @@ -0,0 +1,63 @@ +from icolos.core.containers.generic import GenericData +import unittest +from icolos.core.workflow_steps.structure_prediction.pdb_fixer import StepPdbFixer +from icolos.utils.general.files_paths import attach_root_path +import os +from tests.tests_paths import PATHS_EXAMPLEDATA +from icolos.utils.enums.step_enums import StepBaseEnum + +_SBE = StepBaseEnum + + +class TestPdbFixer(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/structure_prediction") + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + with open(attach_root_path(PATHS_EXAMPLEDATA.PANTHER_RECEPTOR_PDB), "r") as f: + self.pdb = f.read() + + def test_pdb_fixer_default(self): + step_conf = { + _SBE.STEPID: "01_PDB_FIXER", + _SBE.STEP_TYPE: _SBE.STEP_PDB_FIXER, + _SBE.EXEC: {}, + _SBE.SETTINGS: {_SBE.SETTINGS_ARGUMENTS: {}, _SBE.SETTINGS_ADDITIONAL: {}}, + } + step_pdb_fixer = StepPdbFixer(**step_conf) + test_pdb = GenericData(file_name="test.pdb", file_data=self.pdb) + step_pdb_fixer.data.generic.add_file(test_pdb) + step_pdb_fixer.execute() + + out_path = os.path.join(self._test_dir, "test.pdb") + step_pdb_fixer.write_generic_by_extension(path=self._test_dir, ext="pdb") + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 738000) + + def test_pdb_fixer(self): + step_conf = { + _SBE.STEPID: "01_PDB_FIXER", + _SBE.STEP_TYPE: _SBE.STEP_PDB_FIXER, + _SBE.EXEC: {}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "--keep-heterogens": "water", + "--ph": "4.0", + }, + } + }, + } + step_pdb_fixer = StepPdbFixer(**step_conf) + test_pdb = GenericData(file_name="test_2.pdb", file_data=self.pdb) + step_pdb_fixer.data.generic.add_file(test_pdb) + step_pdb_fixer.execute() + + out_path = os.path.join(self._test_dir, "test_2.pdb") + step_pdb_fixer.write_generic_by_extension(path=self._test_dir, ext="pdb") + stat_inf = os.stat(out_path) + self.assertGreater(stat_inf.st_size, 710000) diff --git a/tests/structure_prediction/test_peptide_embedder.py b/tests/structure_prediction/test_peptide_embedder.py new file mode 100644 index 0000000..de47707 --- /dev/null +++ b/tests/structure_prediction/test_peptide_embedder.py @@ -0,0 +1,51 @@ +from icolos.core.containers.generic import GenericData +import unittest +from icolos.core.workflow_steps.structure_prediction.peptide_embedder import ( + StepPeptideEmbedder, +) +from icolos.utils.general.files_paths import attach_root_path +import os +from tests.tests_paths import PATHS_EXAMPLEDATA +from icolos.utils.enums.step_enums import StepBaseEnum + +_SBE = StepBaseEnum + + +class TestPeptideEmbedder(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._test_dir = attach_root_path("tests/junk/structure_prediction") + + if not os.path.isdir(cls._test_dir): + os.makedirs(cls._test_dir) + + def setUp(self): + with open(attach_root_path(PATHS_EXAMPLEDATA.TEST_FASTA_FILE), "r") as f: + self.fasta = f.read() + + def test_peptide_embedder(self): + step_conf = { + _SBE.STEPID: "01_peptide_embedder", + _SBE.STEP_TYPE: _SBE.STEP_PEPTIDE_EMBEDDER, + _SBE.EXEC: {}, + _SBE.SETTINGS: { + _SBE.SETTINGS_ARGUMENTS: { + _SBE.SETTINGS_ARGUMENTS_FLAGS: [], + _SBE.SETTINGS_ARGUMENTS_PARAMETERS: { + "-prime_opt": "OPLS_VERSION=OPLS3e", + "-HOST": "cpu-only", + }, + }, + _SBE.SETTINGS_ADDITIONAL: {}, + }, + } + + step_embedder = StepPeptideEmbedder(**step_conf) + fasta_obj = GenericData(file_name="test_seq.fasta", file_data=self.fasta) + step_embedder.data.generic.add_file(fasta_obj) + step_embedder.execute() + + out_path = os.path.join(self._test_dir, "sequence_0.pdb") + step_embedder.write_generic_by_extension(path=self._test_dir, ext="pdb") + stat_inf = os.stat(out_path) + self.assertEqual(stat_inf.st_size, 17504) diff --git a/tests/tests_paths.py b/tests/tests_paths.py new file mode 100644 index 0000000..5c25ef1 --- /dev/null +++ b/tests/tests_paths.py @@ -0,0 +1,396 @@ +from icolos.utils.enums.program_parameters import PantherEnum +from icolos.core.containers.generic import GenericData +import json +import os +from typing import List, Dict +from icolos.core.containers.compound import Compound, Enumeration, Conformer +from icolos.utils.general.files_paths import attach_root_path +from icolos.utils.smiles import to_smiles +from rdkit import Chem +from icolos.utils.enums.write_out_enums import WriteOutEnum +from shutil import copytree, rmtree + +_PE = PantherEnum() +_WE = WriteOutEnum() + +# load the instantiated "config.json", holding the license key for OpenEye for example +try: + with open( + attach_root_path("icolos/config/unit_tests_config/config.json"), "r" + ) as f: + MAIN_CONFIG = json.load(f) +except: + MAIN_CONFIG = {} + + +def expand_path(path: str) -> str: + return os.path.join(MAIN_CONFIG["ICOLOS_TEST_DATA"], path) + + +def create_test_dir(source: str, dest: str) -> None: + try: + if os.path.isdir(dest): + # remove the existing directory structure before calling copytree or it will complain + rmtree(dest) + copytree(source, dest) + except Exception as e: + os.makedirs(dest) + + +def export_unit_test_env_vars(): + # make sure "PATH" is executed last to expand upwards variables + for key in MAIN_CONFIG.keys(): + if key != "PATH": + if isinstance(MAIN_CONFIG[key], str): + os.environ[str(key)] = os.path.expandvars(MAIN_CONFIG[key]) + # iterate through nested dicts + elif isinstance(MAIN_CONFIG[key], dict): + for k in MAIN_CONFIG[key].keys(): + os.environ[str(k)] = os.path.expandvars(MAIN_CONFIG[key][k]) + if "PATH" in MAIN_CONFIG.keys(): + os.environ["PATH"] = os.path.expandvars(MAIN_CONFIG["PATH"]) + + +class PATHS_1UYD: + + GRID_PATH = expand_path("Glide/1UYD_grid_no_constraints.zip") + GRID_CONSTRAINTS_PATH = expand_path("Glide/1UYD_grid_constraints.zip") + PDBQT_PATH = expand_path("AutoDockVina/1UYD_fixed.pdbqt") + PDB_PATH = expand_path("molecules/1UYD/1UYD_apo.pdb") + LIGANDS = expand_path("molecules/1UYD/1UYD_ligands.sdf") + NATIVE_LIGAND_SDF = expand_path("molecules/1UYD/PU8_native_ligand.sdf") + NATIVE_LIGAND_PDB = expand_path("molecules/1UYD/PU8_native_ligand.pdb") + LIG4_POSES = expand_path("fep_plus/1uyd_lig4.sdf") + XRAY_STRUCTURES = expand_path("fep_plus/xray_structures") + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +class PATHS_EXAMPLEDATA: + + ASPIRIN_SMI_PATH = expand_path("molecules/aspirin.smi") + PARACETAMOL_SMI_PATH = expand_path("molecules/paracetamol.smi") + ASPIRIN_PATH = expand_path("molecules/aspirin.sdf") + PARACETAMOL_PATH = expand_path("molecules/paracetamol.sdf") + SMALL_MOLECULES_SMI_PATH = expand_path("molecules/small_molecules.smi") + SMALL_MOLECULES_CSV_PATH = expand_path("molecules/small_molecules.csv") + SMALL_MOLECULES_CSV_PATH_DELIMITER_SEMICOLON = expand_path( + "molecules/small_molecules_semicolon.csv" + ) + MEDIUM_MOLECULES_SMI_PATH = expand_path("molecules/medium_molecules.smi") + SMALL_MOLECULES_SDF_PATH = expand_path("molecules/small_molecules.sdf") + SMALL_MOLECULE_SDF_PATH = expand_path("molecules/small_molecule.sdf") + SMALL_MOLECULES_JSON_PATH = expand_path("reinvent/small_input.json") + MEDIUM_MOLECULES_SDF_PATH = expand_path("molecules/medium_molecules.sdf") + PARACETAMOL_MULTIPLE_CONF = expand_path("molecules/paracetamol_multiple.sdf") + PARACETAMOL_COSMO = expand_path("Turbomole/paracetamol.cosmo") + PARACETAMOL_COSMO_OUTPUT = expand_path("cosmo/cosmotherm.out") + EPSA_MODEL_PATH = expand_path("models/ePSA_example.pkl") + EPSA_BOLTZMANN_WEIGHTING_EXAMPLE_MOLECULE = expand_path( + "models/ePSA_Boltzmann_weighting.sdf" + ) + GLIDE_EXAMPLE_IN = expand_path("Glide/example.in") + EPSA_EXAMPLE_MOLECULE = expand_path("models/ePSA_example_mol.sdf") + PRIME_RECEPTOR_COX2 = expand_path("prime/cox2_receptor.mae") + PRIME_COX2_GRID = expand_path("molecules/1CX2/1cx2_GridGen.zip") + PRIME_DOCKED_LIGAND_SDF = expand_path("prime/docked_ligand.sdf") + CLUSTERING_11CONFS = expand_path("clustering/paracetamol_11_conformers.sdf") + PANTHER_CONFIG = expand_path("panther/default_panther.in") + PANTHER_RECEPTOR_PDB = expand_path("panther/COX2_A.pdb") + PANTHER_NEGATIVE_IMAGE = expand_path("panther/panther_test_output.mol2") + SHAEP_LIGAND_DOCKED_POSE = expand_path("panther/cox2_ligand_bound.sdf") + + GROMACS_STRUCTURE_FILE = expand_path("gromacs/test_structure.gro") + GROMACS_PDB_FILE = expand_path("gromacs/test_structure.pdb") + GROMACS_NVT_MDP = expand_path("gromacs/nvt_equil.mdp") + GROMACS_NPT_MDP = expand_path("gromacs/npt_equil.mdp") + GROMACS_MINIM_MDP = expand_path("gromacs/minim.mdp") + GROMACS_IONS_MDP = expand_path("gromacs/ions.mdp") + GROMACS_MD_MDP = expand_path("gromacs/md.mdp") + GROMACS_TPR_FILE = expand_path("gromacs/test.tpr") + GROMACS_TOPOL_FILE = expand_path("gromacs/topol.top") + GROMACS_GROMPP_INPUT_STRUCTURE = expand_path("gromacs/grompp_input.gro") + GROMACS_XTC = expand_path("gromacs/md_0_1.xtc") + GROMACS_TPR_TRJCONV = expand_path("gromacs/md_0_1.tpr") + GROMACS_HOLO_STRUCTURE = expand_path("gromacs/protein/1BVG.pdb") + GROMACS_HOLO_STRUCTURE_GRO = expand_path("gromacs/protein/1BVG.gro") + GROMACS_DMP_LIGAND_TRJ = expand_path("gromacs/protein/DMP.xtc") + GROMACS_DMP_LIGAND_SDF = expand_path("gromacs/protein/DMP.sdf") + GROMACS_PROTEIN_FILE_BASE = expand_path("gromacs/protein") + GROMACS_GROMPP_TOPOL = expand_path("gromacs/grompp_topol.top") + GROMACS_DSSP_TPR = expand_path("gromacs/.tpr") + GROMACS_DSSP_XTC = expand_path("gromacs/.xtc") + GROMACS_TS_CLUSTERS = expand_path("gromacs/clusters_ts_example.xvg") + MMPBSA_TPR = expand_path("gromacs/protein/1BVG.tpr") + MMPBSA_XTC = expand_path("gromacs/protein/1BVG.xtc") + MMPBSA_TOP = expand_path("gromacs/protein/1BVG.top") + MMPBSA_CUSTOM_INPUT = expand_path("gromacs/test_input_mmpbsa.in") + MMPBSA_POSRE = expand_path("gromacs/protein/posre.itp") + MMPBSA_LIG_POSRE = expand_path("gromacs/protein/posre_DMP:100.itp") + MMPBSA_LIG_ITP = expand_path("gromacs/protein/DMP:100.itp") + + FEP_PLUS_DOCKING_PV = expand_path("fep_plus/set_pv.maegz") + FEP_PLUS_EXAMPLE_FMP = expand_path("fep_plus/out.fmp") + FEP_PLUS_MAP_LOG = expand_path("fep_plus/fep_mapper.log") + FEP_PLUS_MAP_LOG_MIN = expand_path("fep_plus/fep_mapper_min.log") + FEP_PLUS_MAP_LOG_SINGLE_EDGE = expand_path("fep_plus/fep_mapper_single_edge.log") + + FEP_PLUS_LIGANDS = expand_path("fep_plus/ligprep_confs.sdf") + FEP_PLUS_EXAMPLE_FMP_OUT = expand_path("fep_plus/test_out.fmp") + FEP_PLUS_MULTISIM = expand_path("fep_plus/multisim.log") + FEP_PLUS_PROTEIN = expand_path("fep_plus/.pdb") + FEP_PLUS_OTHER_PROTEIN = expand_path("fep_plus/_apo.pdb") + FEP_PLUS_MULTISIM_LONG = expand_path("fep_plus/multisim.log") + + MODEL_BUILDER_EXAMPLE_JSON = expand_path( + "model_building/OptunaAZ_example_config.json" + ) + MODEL_BUILDER_TEST_INPUT_SDF = expand_path("model_building/test_input_data.sdf") + PRIME_POSEVIEWER = expand_path("molecules/1CX2/1cx2_poseviewer_pv.maegz") + COX2_ACTIVES_DOCKED = expand_path("molecules/1CX2/docked_actives.sdf") + LIGANDS_1UYD = expand_path("prime/glide_docked.sdf") + RECEPTOR_1UYD = expand_path("molecules/1UYD/1UYDreceptor.pdb") + + CAVITY_TRJ_FOLDER = expand_path("cavity_explorer/parch_align_trj") + CAVITY_DTR_FILE = expand_path("cavity_explorer/parch_align_trj/clickme.dtr") + CAVITY_CMS_FILE = expand_path("cavity_explorer/parch_align_trj/out.cms") + MDPOCKET_XTC_FILE = expand_path("cavity_explorer/structure_out_0.xtc") + MDPOCKET_PDB_FILE = expand_path("cavity_explorer/structure_0_wet.pdb") + MDPOCKET_PDB_FILE_DRY = expand_path("cavity_explorer/structure_0.pdb") + MD_POCKET_DESMOND_TOP = expand_path("cavity_explorer/top.pdb") + + DESMOND_SETUP_PDB = expand_path("desmond/1cx2.pdb") + DESMOND_PRODUCTION_CMS = expand_path("desmond/setup.cms") + TEST_FASTA_FILE = expand_path("structure_prediction/1acw.fasta") + + LIGAND_HYBRID_TEST_DIR = expand_path("pmx/lig_hybrid_work_dir") + PREPARE_SIMULATIONS_TEST_DIR = expand_path("pmx/prepare_simulations_work_dir") + ATOM_MAPPING_TEST_DIR = expand_path("pmx/atom_mapping_work_dir") + ASSEMBLE_SYSTEMS_TEST_DIR = expand_path("pmx/assemble_systems_work_dir") + BOX_WATER_IONS_TEST_DIR = expand_path("pmx/box_water_ions_work_dir") + PREPARE_TRANSITIONS_TEST_DIR = expand_path("pmx/prepare_transitions_work_dir") + RUN_ANALYSIS_TEST_DIR = expand_path("pmx/analysis_test_dir") + + RUN_SIMULATIONS_TEST_DIR = expand_path("pmx/run_simulations_work_dir") + PMX_FEP_MAP_LOG_PREPARE_TRANSITIONS = expand_path( + "pmx/prepare_transitions_work_dir/fep_mapper.log" + ) + PMX_LIG1_INPUT_PDB = expand_path("pmx/input/lig_18625-1.pdb") + PMX_ABFE_INPUT_COMPLEX = expand_path("pmx/abfe/1BVG.pdb") + PMX_ABFE_INPUT_LIGAND = expand_path("pmx/abfe/az_ligand.pdb") + PMX_LIG2_INPUT_PDB = expand_path("pmx/input/lig_18626-1.pdb") + PMX_LIG1_INPUT_ITP = expand_path("pmx/input/lig_18625-1.itp") + PMX_LIG2_INPUT_ITP = expand_path("pmx/input/lig_18626-1.itp") + PMX_MAPPED_PAIRS1_DAT = expand_path("pmx/input/pairs1.dat") + PMX_MAPPED_PAIRS2_DAT = expand_path( + "pmx/input/pairs2.dat" + ) # seems to be identical to 1, but in all cases + PMX_LIG1_INPUT_MAPPED_PDB = expand_path("pmx/input/out_atommap_lig1.pdb") + PMX_MDP_FILES = expand_path("pmx/mdppath") + PMX_LIG2_INPUT_MAPPED_PDB = expand_path("pmx/input/out_atommap_lig2.pdb") + DSSP_PDB_1 = expand_path("structure_prediction/1e0n.pdb") + DSSP_PDB_2 = expand_path("structure_prediction/1jbf.pdb") + DSSP_PDB_3 = expand_path("structure_prediction/6nox.pdb") + + # try to find the internal value and return + + # try to find the internal value and return + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + # prohibit any attempt to set any values + def __setattr__(self, key, value): + raise ValueError("No changes allowed.") + + +def get_mol_as_Compound(abs_path: str, compound_number: int = 0) -> Compound: + mol_supplier = Chem.SDMolSupplier(abs_path, removeHs=False) + for mol in mol_supplier: + enum = Enumeration( + smile=to_smiles(mol), original_smile=to_smiles(mol), molecule=mol + ) + comp = Compound( + name=os.path.basename(abs_path), compound_number=compound_number + ) + comp.add_enumeration(enum, auto_update=True) + return comp + + +def get_1UYD_ligands_as_Compounds(abs_path: str) -> List[Compound]: + comp_list = [] + mol_supplier = Chem.SDMolSupplier(abs_path, removeHs=False) + for cur_id, mol in enumerate(mol_supplier): + enum = Enumeration( + smile=to_smiles(mol), original_smile=to_smiles(mol), molecule=mol + ) + comp = Compound(name=mol.GetProp("_Name"), compound_number=cur_id) + comp.add_enumeration(enum, auto_update=True) + comp_list.append(comp) + return comp_list + + +def construct_full_compound_object(source) -> List[Compound]: + def _get_existing_enumeration(comp_id, enum_id): + comp = _get_existing_compound(comp_id) + for enum in comp.get_enumerations(): + if enum.get_enumeration_id() == int(enum_id): + return enum + raise ValueError + + def _get_existing_compound(idx): + for comp in list_compounds: + if int(idx) == comp.get_compound_number(): + return comp + raise ValueError + + list_compounds = [] + for mol in Chem.SDMolSupplier(source, removeHs=False): + new_compound = False + new_enumeration = False + mol_name = mol.GetProp(_WE.RDKIT_NAME) + # assuming the mol name follows Icolos conventions + try: + id_parts = mol_name.split(":") + comp_id = id_parts[0] + enum_id = id_parts[1] + + except: + comp_id = mol_name + enum_id = 0 + try: + # try to find an existing compound with the correct name + compound = _get_existing_compound(idx=comp_id) + except ValueError: + # the compound does not yet exist, create the object + new_compound = True + compound = Compound(name=comp_id, compound_number=comp_id) + try: + # check whether the enumeration exists + enumeration = _get_existing_enumeration(comp_id, enum_id) + except ValueError: + new_enumeration = True + enumeration = Enumeration( + smile=to_smiles(mol), molecule=mol, original_smile=to_smiles(mol) + ) + + if len(id_parts) == 3 and id_parts[2] == "0": + # i.e. 0:0:0, we have a conformer + conf = Conformer( + conformer=mol, + enumeration_object=enumeration, + conformer_id=int(id_parts[2]), + ) + enumeration.add_conformer(conf, auto_update=True) + + if new_enumeration: + compound.add_enumeration(enumeration, auto_update=True) + if new_compound: + list_compounds.append(compound) + return list_compounds + + +def get_ligands_as_compounds_with_conformers( + abs_path: str, poseviewer=None +) -> List[Compound]: + comp_list = [] + mol_supplier = Chem.SDMolSupplier(abs_path, removeHs=False) + for cur_id, mol in enumerate(mol_supplier): + + # + enum = Enumeration( + smile=to_smiles(mol), original_smile=to_smiles(mol), molecule=mol + ) + conf = Conformer(conformer=mol) + if poseviewer is not None: + conf.add_extra_data("structures_pv.maegz", data=poseviewer) + enum.add_conformer(conf) + comp = Compound(name=mol.GetProp("_Name"), compound_number=cur_id) + comp.add_enumeration(enum, auto_update=True) + comp_list.append(comp) + return comp_list + + +def get_docked_ligands_as_conformers(abs_path: str, poseviewer=None) -> List[Compound]: + mol_supplier = Chem.SDMolSupplier(abs_path, removeHs=False) + comp = Compound(name="test_poses", compound_number=1) + enum = Enumeration() + for cur_id, mol in enumerate(mol_supplier): + conf = Conformer(conformer=mol, conformer_id=cur_id) + + if cur_id == 0 and poseviewer is not None: + conf.add_extra_data(key="structures_pv.maegz", data=poseviewer) + + enum.add_conformer(conf) + + comp.add_enumeration(enum) + return [comp] + + +def get_mol_as_Conformer(abs_path: str) -> List[Conformer]: + list_return = [] + mol_supplier = Chem.SDMolSupplier(abs_path, removeHs=False) + for mol in mol_supplier: + list_return.append(Conformer(conformer=mol)) + return list_return + + +def get_test_Compounds_without_molecules( + compound_numbers: List[int] = [0], +) -> Dict[str, Compound]: + """These compounds have neither a molecule in the enumeration nor any Conformer, i.e. no 3D structure.""" + aspirin = Compound(name="Aspirin", compound_number=compound_numbers[0]) + aspirin.add_enumeration( + Enumeration( + compound_object=aspirin, + smile="O=C(C)Oc1ccccc1C(=O)O", + original_smile="O=C(C)Oc1ccccc1C(=O)O", + ) + ) + return {"Aspirin": aspirin} + + +def load_SDF_docked(abs_path: int) -> List[Compound]: + compounds = [] + mol_supplier = Chem.SDMolSupplier(abs_path, removeHs=False) + for mol_id, mol in enumerate(mol_supplier): + comp = Compound(compound_number=mol_id) + enum = Enumeration( + smile=str(mol.GetProp("smiles")), + original_smile=str(mol.GetProp("original_smiles")), + ) + conf = Conformer(conformer=mol) + enum.add_conformer(conf, auto_update=True) + comp.add_enumeration(enum, auto_update=True) + compounds.append(comp) + return compounds + + +def directory_to_generic(path: str) -> List[GenericData]: + """converts all files in a given path to generic data and returns a list with them""" + generic_files = [] + + for r, d, f in os.walk(path): + for file in f: + try: + with open(os.path.join(r, file), "r") as read_file: + data = read_file.read() + file_name = file.split("/")[-1] + generic_file = GenericData(file_name=file_name, file_data=data) + generic_files.append(generic_file) + except UnicodeDecodeError: + with open(os.path.join(r, file), "rb") as read_file: + data = read_file.read() + file_name = file.split("/")[-1] + generic_file = GenericData(file_name=file_name, file_data=data) + generic_files.append(generic_file) + return generic_files diff --git a/unit_tests.py b/unit_tests.py new file mode 100644 index 0000000..6022401 --- /dev/null +++ b/unit_tests.py @@ -0,0 +1,29 @@ +import unittest + +from tests.CREST import * +from tests.OMEGA import * +from tests.XTB import * +from tests.Turbomole import * +from tests.cosmo import * +from tests.clustering import * +from tests.rms_filter import * +from tests.boltzmann_weighting import * +from tests.composite_agents import * +from tests.containers import * +from tests.io import * +from tests.feature_counter import * +from tests.prediction import * +from tests.step_utils import * +from tests.schrodinger import * +from tests.autodockvina import * +from tests.panther import * +from tests.shaep import * +from tests.gromacs import * +from tests.cavity_explorer import * +from tests.structure_prediction import * +from tests.rmsd import * +from tests.flow_control import * +from tests.pmx import * + +if __name__ == "__main__": + unittest.main()