From 046469f6dc925cf4696dca6d3c0345776265d018 Mon Sep 17 00:00:00 2001
From: kdmh016 <christian.margreitter@astrazeneca.com>
Date: Fri, 28 Jan 2022 16:42:12 +0100
Subject: [PATCH] Version 1.4.2, reinitialized.

---
 .gitignore                                    |   16 +
 CHANGELOG.md                                  |   68 +
 DOCKERFILE                                    |    7 +
 LICENCE                                       |  169 ++
 README.md                                     |   75 +
 environment_min.yml                           |   24 +
 examples/hpc_script/NIBR_reinvent.sh          |   11 +
 examples/hpc_script/SLURM_script.sh           |   11 +
 examples/workflow/desmond_md.json             |   72 +
 .../docking/active_learning_docking.json      |   68 +
 examples/workflow/docking/adv_docking.json    |  120 ++
 .../docking/adv_target_preparation.json       |   44 +
 examples/workflow/docking/glide_docking.json  |  122 ++
 examples/workflow/fep_plus/fep_plus_aws.json  |  171 ++
 .../gromacs/gromacs_ensemble_mmgbsa.json      |  619 +++++++
 .../workflow/gromacs/gromacs_fpocket.json     |  656 ++++++++
 examples/workflow/gromacs/gromacs_md.json     |  488 ++++++
 examples/workflow/gromacs/gromacs_mmgbsa.json |  580 +++++++
 examples/workflow/input_file_types.json       |   91 ++
 examples/workflow/pmx/pmx_rbfe.json           |  376 +++++
 examples/workflow/qm/ePSA_permeability.json   |  334 ++++
 examples/workflow/qm/full_rescoss.json        |  464 ++++++
 .../workflow/reinvent/feature_counter.json    |   71 +
 .../reinvent/nibr_local_reinvent.json         |  159 ++
 .../rescoring/negative_image_panther.json     |   58 +
 examples/workflow/rescoring/nibr_local.json   |  161 ++
 .../workflow/rescoring/rmsd_rescoring.json    |  223 +++
 executor.py                                   |   95 ++
 external_documentation/REINVENT_input.json    |    4 +
 external_documentation/REINVENT_result.json   |    9 +
 external_documentation/fep_mapper.txt         |  102 ++
 external_documentation/fep_plus.txt           |  109 ++
 external_documentation/prime_arguments.txt    |  180 +++
 icolos/__init__.py                            |    0
 icolos/config/amber/default_mmpbsa.in         |    6 +
 icolos/config/cosmo/default_cosmo.config      |   28 +
 icolos/config/desmond/config.msj              |   23 +
 icolos/config/desmond/prod.cfg                |   69 +
 icolos/config/desmond/production.msj          |  102 ++
 icolos/config/logging/debug.json              |   75 +
 icolos/config/logging/default.json            |   75 +
 icolos/config/logging/tutorial.json           |   75 +
 icolos/config/logging/verbose.json            |   75 +
 icolos/config/panther/default_panther.in      |   74 +
 .../unit_tests_config/cosmo/cosmo.config      |   28 +
 .../cosmo/example.cosmo.config                |   28 +
 .../unit_tests_config/example.config.json     |   30 +
 icolos/config_containers/__init__.py          |    0
 icolos/config_containers/container.py         |   33 +
 .../config_containers/workflow_container.py   |   13 +
 icolos/core/__init__.py                       |    0
 icolos/core/composite_agents/__init__.py      |    0
 icolos/core/composite_agents/base_agent.py    |  108 ++
 icolos/core/composite_agents/scheduler.py     |   54 +
 icolos/core/composite_agents/workflow.py      |  143 ++
 icolos/core/containers/__init__.py            |    0
 icolos/core/containers/compound.py            |  549 +++++++
 icolos/core/containers/generic.py             |  210 +++
 icolos/core/containers/perturbation_map.py    |  294 ++++
 icolos/core/flow_control/__init__.py          |    0
 icolos/core/flow_control/flow_control.py      |   68 +
 icolos/core/flow_control/iterator.py          |  223 +++
 icolos/core/job_control/__init__.py           |    0
 icolos/core/job_control/job_control.py        |   89 +
 icolos/core/step_utils/__init__.py            |    0
 icolos/core/step_utils/input_merger.py        |  114 ++
 icolos/core/step_utils/input_preparator.py    |  535 ++++++
 icolos/core/step_utils/rdkit_utils.py         |   10 +
 icolos/core/step_utils/retry.py               |   42 +
 .../core/step_utils/run_variables_resolver.py |   67 +
 icolos/core/step_utils/sdconvert_util.py      |   68 +
 icolos/core/step_utils/step_writeout.py       |  507 ++++++
 icolos/core/step_utils/structcat_util.py      |   68 +
 icolos/core/step_utils/structconvert.py       |   69 +
 icolos/core/steps_utils.py                    |   22 +
 icolos/core/workflow_steps/__init__.py        |    0
 .../workflow_steps/autodockvina/__init__.py   |    0
 .../workflow_steps/autodockvina/docking.py    |  324 ++++
 .../autodockvina/target_preparation.py        |  137 ++
 .../workflow_steps/calculation/__init__.py    |    0
 .../core/workflow_steps/calculation/base.py   |   52 +
 .../calculation/boltzmann_weighting.py        |   98 ++
 .../workflow_steps/calculation/clustering.py  |  140 ++
 .../core/workflow_steps/calculation/cosmo.py  |  311 ++++
 .../calculation/electrostatics/__init__.py    |    0
 .../calculation/electrostatics/cresset_ec.py  |  108 ++
 .../calculation/electrostatics/esp_sim.py     |  152 ++
 .../calculation/feature_counter.py            |   64 +
 .../workflow_steps/calculation/panther.py     |  152 ++
 .../workflow_steps/calculation/rms_filter.py  |   97 ++
 .../core/workflow_steps/calculation/rmsd.py   |   47 +
 .../core/workflow_steps/calculation/shaep.py  |   77 +
 .../workflow_steps/calculation/turbomole.py   |  440 +++++
 .../cavity_explorer/__init__.py               |    0
 .../workflow_steps/cavity_explorer/base.py    |   69 +
 .../cavity_explorer/mdpocket.py               |  306 ++++
 .../core/workflow_steps/confgen/__init__.py   |    0
 icolos/core/workflow_steps/confgen/base.py    |    7 +
 icolos/core/workflow_steps/confgen/crest.py   |  121 ++
 icolos/core/workflow_steps/confgen/omega.py   |  111 ++
 icolos/core/workflow_steps/confgen/xtb.py     |  170 ++
 .../core/workflow_steps/gromacs/__init__.py   |   10 +
 icolos/core/workflow_steps/gromacs/base.py    |  195 +++
 icolos/core/workflow_steps/gromacs/cluster.py |   81 +
 .../workflow_steps/gromacs/clusters_ts.py     |   88 +
 icolos/core/workflow_steps/gromacs/do_dssp.py |   57 +
 .../core/workflow_steps/gromacs/editconf.py   |   57 +
 icolos/core/workflow_steps/gromacs/genion.py  |   69 +
 icolos/core/workflow_steps/gromacs/grompp.py  |  125 ++
 icolos/core/workflow_steps/gromacs/mdrun.py   |   67 +
 icolos/core/workflow_steps/gromacs/mmpbsa.py  |  161 ++
 icolos/core/workflow_steps/gromacs/pdb2gmx.py |  455 ++++++
 icolos/core/workflow_steps/gromacs/rsmd.py    |   59 +
 icolos/core/workflow_steps/gromacs/solvate.py |   46 +
 icolos/core/workflow_steps/gromacs/trajcat.py |   54 +
 icolos/core/workflow_steps/gromacs/trjconv.py |   50 +
 icolos/core/workflow_steps/io/__init__.py     |    0
 icolos/core/workflow_steps/io/base.py         |   10 +
 .../workflow_steps/io/data_manipulation.py    |  248 +++
 icolos/core/workflow_steps/io/embedder.py     |  136 ++
 .../workflow_steps/io/initialize_compound.py  |   20 +
 icolos/core/workflow_steps/pmx/__init__.py    |   14 +
 icolos/core/workflow_steps/pmx/abfe.py        |  149 ++
 .../workflow_steps/pmx/assemble_systems.py    |   53 +
 icolos/core/workflow_steps/pmx/atomMapping.py |   86 +
 icolos/core/workflow_steps/pmx/base.py        |  255 +++
 .../core/workflow_steps/pmx/box_water_ions.py |   58 +
 icolos/core/workflow_steps/pmx/doublebox.py   |   33 +
 icolos/core/workflow_steps/pmx/genlib.py      |   68 +
 icolos/core/workflow_steps/pmx/gentop.py      |   46 +
 .../core/workflow_steps/pmx/ligandHybrid.py   |  121 ++
 icolos/core/workflow_steps/pmx/mutate.py      |   67 +
 .../workflow_steps/pmx/prepare_simulations.py |   51 +
 .../workflow_steps/pmx/prepare_transitions.py |   48 +
 .../core/workflow_steps/pmx/run_analysis.py   |   47 +
 .../workflow_steps/pmx/run_simulations.py     |   58 +
 .../core/workflow_steps/pmx/setup_workpath.py |  192 +++
 .../workflow_steps/prediction/__init__.py     |    0
 .../prediction/active_learning.py             |  267 +++
 .../prediction/model_building.py              |  269 ++++
 .../workflow_steps/prediction/predictor.py    |   86 +
 .../workflow_steps/schrodinger/__init__.py    |    9 +
 .../core/workflow_steps/schrodinger/base.py   |  366 +++++
 .../schrodinger/desmond_exec.py               |  114 ++
 .../schrodinger/desmond_preprocessor.py       |   75 +
 .../schrodinger/fep_analysis.py               |   27 +
 .../workflow_steps/schrodinger/fep_base.py    |  211 +++
 .../schrodinger/fep_plus_execution.py         |  192 +++
 .../schrodinger/fep_plus_setup.py             |  303 ++++
 .../core/workflow_steps/schrodinger/glide.py  |  635 ++++++++
 .../workflow_steps/schrodinger/ligprep.py     |  322 ++++
 .../workflow_steps/schrodinger/macromodel.py  |  160 ++
 .../workflow_steps/schrodinger/prepwizard.py  |   90 ++
 .../core/workflow_steps/schrodinger/prime.py  |  239 +++
 icolos/core/workflow_steps/step.py            |  494 ++++++
 .../structure_prediction/__init__.py          |    0
 .../structure_prediction/disicl.py            |    0
 .../structure_prediction/dssp.py              |   56 +
 .../structure_prediction/pdb_fixer.py         |   66 +
 .../structure_prediction/peptide_embedder.py  |   36 +
 .../structure_prediction/rosetta_abinitio.py  |   15 +
 icolos/loggers/__init__.py                    |    0
 icolos/loggers/agentlogger.py                 |   12 +
 icolos/loggers/base_logger.py                 |   27 +
 icolos/loggers/blank_logger.py                |   14 +
 icolos/loggers/entrypoint_logger.py           |   12 +
 icolos/loggers/iologger.py                    |   12 +
 icolos/loggers/logger_utils.py                |    4 +
 icolos/loggers/steplogger.py                  |   12 +
 icolos/scripts/__init__.py                    |    0
 icolos/scripts/cli.py                         |  103 ++
 icolos/utils/__init__.py                      |    0
 icolos/utils/constants.py                     |    2 +
 .../utils/entry_point_functions/__init__.py   |    0
 .../logging_helper_functions.py               |   29 +
 .../parsing_functions.py                      |   75 +
 icolos/utils/enums/__init__.py                |    0
 icolos/utils/enums/composite_agents_enums.py  |   59 +
 icolos/utils/enums/compound_enums.py          |   56 +
 icolos/utils/enums/entry_points.py            |   15 +
 icolos/utils/enums/execution_enums.py         |   13 +
 icolos/utils/enums/flow_control_enums.py      |   15 +
 icolos/utils/enums/general_utils_enums.py     |   15 +
 icolos/utils/enums/input_enums.py             |   19 +
 icolos/utils/enums/logging_enums.py           |   31 +
 icolos/utils/enums/parallelization.py         |   18 +
 icolos/utils/enums/program_parameters.py      | 1428 +++++++++++++++++
 icolos/utils/enums/step_enums.py              |  922 +++++++++++
 .../utils/enums/step_initialization_enum.py   |  110 ++
 icolos/utils/enums/write_out_enums.py         |   47 +
 icolos/utils/execute_external/__init__.py     |    0
 icolos/utils/execute_external/autodockvina.py |   41 +
 .../utils/execute_external/batch_executor.py  |  145 ++
 .../execute_external/cresset_executor.py      |   40 +
 icolos/utils/execute_external/crest.py        |   41 +
 icolos/utils/execute_external/execute.py      |  101 ++
 icolos/utils/execute_external/fep_plus.py     |   48 +
 icolos/utils/execute_external/glide.py        |   44 +
 icolos/utils/execute_external/gromacs.py      |   53 +
 icolos/utils/execute_external/ifd_executor.py |   40 +
 .../execute_external/license_token_guard.py   |  127 ++
 icolos/utils/execute_external/ligprep.py      |   47 +
 icolos/utils/execute_external/macromodel.py   |   46 +
 icolos/utils/execute_external/omega.py        |   42 +
 icolos/utils/execute_external/openbabel.py    |   43 +
 icolos/utils/execute_external/pmx.py          |   54 +
 icolos/utils/execute_external/prime.py        |   43 +
 icolos/utils/execute_external/schrodinger.py  |   58 +
 icolos/utils/execute_external/sdconvert.py    |   46 +
 icolos/utils/execute_external/structcat.py    |   46 +
 icolos/utils/execute_external/turbomole.py    |   62 +
 icolos/utils/execute_external/xtb.py          |   40 +
 icolos/utils/general/__init__.py              |    0
 .../utils/general/arparse_bool_extension.py   |   14 +
 icolos/utils/general/convenience_functions.py |   60 +
 icolos/utils/general/files_paths.py           |  105 ++
 icolos/utils/general/icolos_exceptions.py     |   25 +
 icolos/utils/general/molecules.py             |   20 +
 icolos/utils/general/notifications.py         |    0
 icolos/utils/general/parallelization.py       |  159 ++
 icolos/utils/general/print_log.py             |   14 +
 icolos/utils/general/progress_bar.py          |    7 +
 icolos/utils/general/strings.py               |    8 +
 icolos/utils/smiles.py                        |  145 ++
 icolos_workflow.py                            |   65 +
 integration_tests.py                          |    5 +
 licences/espsim_licence.txt                   |   21 +
 pyproject.toml                                |    3 +
 sdf2smi.py                                    |   99 ++
 setup.py                                      |   17 +
 tests/CREST/__init__.py                       |    1 +
 tests/CREST/test_CREST_confgen.py             |  115 ++
 tests/OMEGA/__init__.py                       |    1 +
 tests/OMEGA/test_OMEGA_confgen.py             |  124 ++
 tests/Turbomole/__init__.py                   |    1 +
 tests/Turbomole/test_Turbomole.py             |  253 +++
 tests/XTB/__init__.py                         |    1 +
 tests/XTB/test_XTB_confgen.py                 |  207 +++
 tests/__init__.py                             |    0
 tests/autodockvina/__init__.py                |    2 +
 tests/autodockvina/test_adv_docking.py        |   95 ++
 tests/autodockvina/test_adv_target_prep.py    |   90 ++
 tests/boltzmann_weighting/__init__.py         |    1 +
 .../test_boltzmann_weighting.py               |  130 ++
 tests/cavity_explorer/__init__.py             |    1 +
 tests/cavity_explorer/test_md_pocket.py       |  115 ++
 tests/clustering/__init__.py                  |    1 +
 tests/clustering/test_clustering.py           |   91 ++
 tests/composite_agents/__init__.py            |    1 +
 tests/composite_agents/test_workflow.py       |  451 ++++++
 tests/containers/__init__.py                  |    3 +
 tests/containers/test_compound.py             |  113 ++
 tests/containers/test_generic.py              |   32 +
 tests/containers/test_perturbation_map.py     |   44 +
 tests/cosmo/__init__.py                       |    1 +
 tests/cosmo/test_Cosmo.py                     |  174 ++
 tests/esp_sim/__init__.py                     |    1 +
 tests/esp_sim/test_esp_sim.py                 |   85 +
 tests/feature_counter/__init__.py             |    1 +
 tests/feature_counter/test_feature_counter.py |  102 ++
 tests/flow_control/__init__.py                |    1 +
 tests/flow_control/test_iterator.py           |  197 +++
 tests/gromacs/__init__.py                     |   14 +
 tests/gromacs/test_cluster.py                 |   70 +
 tests/gromacs/test_cluster_ts.py              |   55 +
 tests/gromacs/test_do_dssp.py                 |   54 +
 tests/gromacs/test_editconf.py                |   54 +
 tests/gromacs/test_genion.py                  |   59 +
 tests/gromacs/test_grompp.py                  |   72 +
 tests/gromacs/test_mdrun.py                   |   70 +
 tests/gromacs/test_mmpbsa.py                  |  142 ++
 tests/gromacs/test_pdb2gmx.py                 |   91 ++
 tests/gromacs/test_removelig.py               |    1 +
 tests/gromacs/test_rmsd.py                    |   56 +
 tests/gromacs/test_solvate.py                 |   60 +
 tests/gromacs/test_trjcat.py                  |   56 +
 tests/gromacs/test_trjconv.py                 |   53 +
 tests/integration_tests/__init__.py           |    5 +
 tests/integration_tests/test_docking.py       |  239 +++
 tests/integration_tests/test_fep_plus.py      |  224 +++
 tests/integration_tests/test_gromacs.py       | 1076 +++++++++++++
 tests/integration_tests/test_rmsd_iter.py     |  344 ++++
 tests/io/__init__.py                          |    3 +
 tests/io/test_data_manipulation.py            |  325 ++++
 tests/io/test_embedder.py                     |  135 ++
 tests/io/test_initialize_compound.py          |  240 +++
 tests/panther/__init__.py                     |    1 +
 tests/panther/test_panther.py                 |   48 +
 tests/pmx/__init__.py                         |   11 +
 tests/pmx/test_abfe.py                        |   62 +
 tests/pmx/test_analyse.py                     |   65 +
 tests/pmx/test_assemble_systems.py            |   77 +
 tests/pmx/test_atomMapping.py                 |   67 +
 tests/pmx/test_box_water_ions.py              |   70 +
 tests/pmx/test_doublebox.py                   |   20 +
 tests/pmx/test_genlib.py                      |   20 +
 tests/pmx/test_gentop.py                      |   20 +
 tests/pmx/test_ligandHybrid.py                |   70 +
 tests/pmx/test_mutate.py                      |   20 +
 tests/pmx/test_prepare_simulations.py         |   73 +
 tests/pmx/test_prepare_transitions.py         |   95 ++
 tests/pmx/test_run_simulations.py             |  113 ++
 tests/pmx/test_setup_workpath.py              |   80 +
 tests/prediction/__init__.py                  |    3 +
 tests/prediction/test_active_learning.py      |  100 ++
 tests/prediction/test_model_building.py       |   91 ++
 tests/prediction/test_predictor.py            |   68 +
 tests/rms_filter/__init__.py                  |    1 +
 tests/rms_filter/test_rmsfilter.py            |  203 +++
 tests/rmsd/__init__.py                        |    1 +
 tests/rmsd/test_rmsd.py                       |  112 ++
 tests/schrodinger/__init__.py                 |   10 +
 tests/schrodinger/test_desmond_production.py  |   48 +
 tests/schrodinger/test_desmond_setup.py       |   48 +
 tests/schrodinger/test_fep_analysis.py        |  105 ++
 tests/schrodinger/test_fep_plus_execution.py  |  124 ++
 tests/schrodinger/test_fep_plus_setup.py      |   92 ++
 tests/schrodinger/test_glide.py               |  511 ++++++
 tests/schrodinger/test_ligprep.py             |  220 +++
 tests/schrodinger/test_macromodel.py          |   93 ++
 tests/schrodinger/test_prepwizard.py          |  118 ++
 tests/schrodinger/test_prime.py               |  195 +++
 tests/shaep/__init__.py                       |    1 +
 tests/shaep/test_shaep.py                     |   72 +
 tests/step_utils/__init__.py                  |    4 +
 tests/step_utils/test_input_merger.py         |  262 +++
 tests/step_utils/test_input_preparator.py     |  101 ++
 .../step_utils/test_run_variables_resolver.py |  164 ++
 tests/step_utils/test_structconvert.py        |   32 +
 tests/step_utils/test_writeout.py             |  335 ++++
 tests/structure_prediction/__init__.py        |    5 +
 tests/structure_prediction/test_dssp.py       |   52 +
 tests/structure_prediction/test_pdb_fixer.py  |   63 +
 .../test_peptide_embedder.py                  |   51 +
 tests/tests_paths.py                          |  396 +++++
 unit_tests.py                                 |   29 +
 336 files changed, 35659 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 CHANGELOG.md
 create mode 100644 DOCKERFILE
 create mode 100644 LICENCE
 create mode 100644 README.md
 create mode 100644 environment_min.yml
 create mode 100644 examples/hpc_script/NIBR_reinvent.sh
 create mode 100644 examples/hpc_script/SLURM_script.sh
 create mode 100644 examples/workflow/desmond_md.json
 create mode 100644 examples/workflow/docking/active_learning_docking.json
 create mode 100644 examples/workflow/docking/adv_docking.json
 create mode 100644 examples/workflow/docking/adv_target_preparation.json
 create mode 100644 examples/workflow/docking/glide_docking.json
 create mode 100644 examples/workflow/fep_plus/fep_plus_aws.json
 create mode 100644 examples/workflow/gromacs/gromacs_ensemble_mmgbsa.json
 create mode 100644 examples/workflow/gromacs/gromacs_fpocket.json
 create mode 100644 examples/workflow/gromacs/gromacs_md.json
 create mode 100644 examples/workflow/gromacs/gromacs_mmgbsa.json
 create mode 100644 examples/workflow/input_file_types.json
 create mode 100644 examples/workflow/pmx/pmx_rbfe.json
 create mode 100644 examples/workflow/qm/ePSA_permeability.json
 create mode 100644 examples/workflow/qm/full_rescoss.json
 create mode 100644 examples/workflow/reinvent/feature_counter.json
 create mode 100644 examples/workflow/reinvent/nibr_local_reinvent.json
 create mode 100644 examples/workflow/rescoring/negative_image_panther.json
 create mode 100644 examples/workflow/rescoring/nibr_local.json
 create mode 100644 examples/workflow/rescoring/rmsd_rescoring.json
 create mode 100644 executor.py
 create mode 100644 external_documentation/REINVENT_input.json
 create mode 100644 external_documentation/REINVENT_result.json
 create mode 100644 external_documentation/fep_mapper.txt
 create mode 100644 external_documentation/fep_plus.txt
 create mode 100644 external_documentation/prime_arguments.txt
 create mode 100644 icolos/__init__.py
 create mode 100644 icolos/config/amber/default_mmpbsa.in
 create mode 100644 icolos/config/cosmo/default_cosmo.config
 create mode 100644 icolos/config/desmond/config.msj
 create mode 100644 icolos/config/desmond/prod.cfg
 create mode 100644 icolos/config/desmond/production.msj
 create mode 100644 icolos/config/logging/debug.json
 create mode 100644 icolos/config/logging/default.json
 create mode 100644 icolos/config/logging/tutorial.json
 create mode 100644 icolos/config/logging/verbose.json
 create mode 100644 icolos/config/panther/default_panther.in
 create mode 100644 icolos/config/unit_tests_config/cosmo/cosmo.config
 create mode 100644 icolos/config/unit_tests_config/cosmo/example.cosmo.config
 create mode 100644 icolos/config/unit_tests_config/example.config.json
 create mode 100644 icolos/config_containers/__init__.py
 create mode 100644 icolos/config_containers/container.py
 create mode 100644 icolos/config_containers/workflow_container.py
 create mode 100644 icolos/core/__init__.py
 create mode 100644 icolos/core/composite_agents/__init__.py
 create mode 100644 icolos/core/composite_agents/base_agent.py
 create mode 100644 icolos/core/composite_agents/scheduler.py
 create mode 100644 icolos/core/composite_agents/workflow.py
 create mode 100644 icolos/core/containers/__init__.py
 create mode 100644 icolos/core/containers/compound.py
 create mode 100644 icolos/core/containers/generic.py
 create mode 100644 icolos/core/containers/perturbation_map.py
 create mode 100644 icolos/core/flow_control/__init__.py
 create mode 100644 icolos/core/flow_control/flow_control.py
 create mode 100644 icolos/core/flow_control/iterator.py
 create mode 100644 icolos/core/job_control/__init__.py
 create mode 100644 icolos/core/job_control/job_control.py
 create mode 100644 icolos/core/step_utils/__init__.py
 create mode 100644 icolos/core/step_utils/input_merger.py
 create mode 100644 icolos/core/step_utils/input_preparator.py
 create mode 100644 icolos/core/step_utils/rdkit_utils.py
 create mode 100644 icolos/core/step_utils/retry.py
 create mode 100644 icolos/core/step_utils/run_variables_resolver.py
 create mode 100644 icolos/core/step_utils/sdconvert_util.py
 create mode 100644 icolos/core/step_utils/step_writeout.py
 create mode 100644 icolos/core/step_utils/structcat_util.py
 create mode 100644 icolos/core/step_utils/structconvert.py
 create mode 100644 icolos/core/steps_utils.py
 create mode 100644 icolos/core/workflow_steps/__init__.py
 create mode 100644 icolos/core/workflow_steps/autodockvina/__init__.py
 create mode 100644 icolos/core/workflow_steps/autodockvina/docking.py
 create mode 100644 icolos/core/workflow_steps/autodockvina/target_preparation.py
 create mode 100644 icolos/core/workflow_steps/calculation/__init__.py
 create mode 100644 icolos/core/workflow_steps/calculation/base.py
 create mode 100644 icolos/core/workflow_steps/calculation/boltzmann_weighting.py
 create mode 100644 icolos/core/workflow_steps/calculation/clustering.py
 create mode 100644 icolos/core/workflow_steps/calculation/cosmo.py
 create mode 100644 icolos/core/workflow_steps/calculation/electrostatics/__init__.py
 create mode 100644 icolos/core/workflow_steps/calculation/electrostatics/cresset_ec.py
 create mode 100644 icolos/core/workflow_steps/calculation/electrostatics/esp_sim.py
 create mode 100644 icolos/core/workflow_steps/calculation/feature_counter.py
 create mode 100644 icolos/core/workflow_steps/calculation/panther.py
 create mode 100644 icolos/core/workflow_steps/calculation/rms_filter.py
 create mode 100644 icolos/core/workflow_steps/calculation/rmsd.py
 create mode 100644 icolos/core/workflow_steps/calculation/shaep.py
 create mode 100644 icolos/core/workflow_steps/calculation/turbomole.py
 create mode 100644 icolos/core/workflow_steps/cavity_explorer/__init__.py
 create mode 100644 icolos/core/workflow_steps/cavity_explorer/base.py
 create mode 100644 icolos/core/workflow_steps/cavity_explorer/mdpocket.py
 create mode 100644 icolos/core/workflow_steps/confgen/__init__.py
 create mode 100644 icolos/core/workflow_steps/confgen/base.py
 create mode 100644 icolos/core/workflow_steps/confgen/crest.py
 create mode 100644 icolos/core/workflow_steps/confgen/omega.py
 create mode 100644 icolos/core/workflow_steps/confgen/xtb.py
 create mode 100644 icolos/core/workflow_steps/gromacs/__init__.py
 create mode 100644 icolos/core/workflow_steps/gromacs/base.py
 create mode 100644 icolos/core/workflow_steps/gromacs/cluster.py
 create mode 100644 icolos/core/workflow_steps/gromacs/clusters_ts.py
 create mode 100644 icolos/core/workflow_steps/gromacs/do_dssp.py
 create mode 100644 icolos/core/workflow_steps/gromacs/editconf.py
 create mode 100644 icolos/core/workflow_steps/gromacs/genion.py
 create mode 100644 icolos/core/workflow_steps/gromacs/grompp.py
 create mode 100644 icolos/core/workflow_steps/gromacs/mdrun.py
 create mode 100644 icolos/core/workflow_steps/gromacs/mmpbsa.py
 create mode 100644 icolos/core/workflow_steps/gromacs/pdb2gmx.py
 create mode 100644 icolos/core/workflow_steps/gromacs/rsmd.py
 create mode 100644 icolos/core/workflow_steps/gromacs/solvate.py
 create mode 100644 icolos/core/workflow_steps/gromacs/trajcat.py
 create mode 100644 icolos/core/workflow_steps/gromacs/trjconv.py
 create mode 100644 icolos/core/workflow_steps/io/__init__.py
 create mode 100644 icolos/core/workflow_steps/io/base.py
 create mode 100644 icolos/core/workflow_steps/io/data_manipulation.py
 create mode 100644 icolos/core/workflow_steps/io/embedder.py
 create mode 100644 icolos/core/workflow_steps/io/initialize_compound.py
 create mode 100644 icolos/core/workflow_steps/pmx/__init__.py
 create mode 100644 icolos/core/workflow_steps/pmx/abfe.py
 create mode 100644 icolos/core/workflow_steps/pmx/assemble_systems.py
 create mode 100644 icolos/core/workflow_steps/pmx/atomMapping.py
 create mode 100644 icolos/core/workflow_steps/pmx/base.py
 create mode 100644 icolos/core/workflow_steps/pmx/box_water_ions.py
 create mode 100644 icolos/core/workflow_steps/pmx/doublebox.py
 create mode 100644 icolos/core/workflow_steps/pmx/genlib.py
 create mode 100644 icolos/core/workflow_steps/pmx/gentop.py
 create mode 100644 icolos/core/workflow_steps/pmx/ligandHybrid.py
 create mode 100644 icolos/core/workflow_steps/pmx/mutate.py
 create mode 100644 icolos/core/workflow_steps/pmx/prepare_simulations.py
 create mode 100644 icolos/core/workflow_steps/pmx/prepare_transitions.py
 create mode 100644 icolos/core/workflow_steps/pmx/run_analysis.py
 create mode 100644 icolos/core/workflow_steps/pmx/run_simulations.py
 create mode 100644 icolos/core/workflow_steps/pmx/setup_workpath.py
 create mode 100644 icolos/core/workflow_steps/prediction/__init__.py
 create mode 100644 icolos/core/workflow_steps/prediction/active_learning.py
 create mode 100644 icolos/core/workflow_steps/prediction/model_building.py
 create mode 100644 icolos/core/workflow_steps/prediction/predictor.py
 create mode 100644 icolos/core/workflow_steps/schrodinger/__init__.py
 create mode 100644 icolos/core/workflow_steps/schrodinger/base.py
 create mode 100644 icolos/core/workflow_steps/schrodinger/desmond_exec.py
 create mode 100644 icolos/core/workflow_steps/schrodinger/desmond_preprocessor.py
 create mode 100644 icolos/core/workflow_steps/schrodinger/fep_analysis.py
 create mode 100644 icolos/core/workflow_steps/schrodinger/fep_base.py
 create mode 100644 icolos/core/workflow_steps/schrodinger/fep_plus_execution.py
 create mode 100644 icolos/core/workflow_steps/schrodinger/fep_plus_setup.py
 create mode 100644 icolos/core/workflow_steps/schrodinger/glide.py
 create mode 100644 icolos/core/workflow_steps/schrodinger/ligprep.py
 create mode 100644 icolos/core/workflow_steps/schrodinger/macromodel.py
 create mode 100644 icolos/core/workflow_steps/schrodinger/prepwizard.py
 create mode 100644 icolos/core/workflow_steps/schrodinger/prime.py
 create mode 100644 icolos/core/workflow_steps/step.py
 create mode 100644 icolos/core/workflow_steps/structure_prediction/__init__.py
 create mode 100644 icolos/core/workflow_steps/structure_prediction/disicl.py
 create mode 100644 icolos/core/workflow_steps/structure_prediction/dssp.py
 create mode 100644 icolos/core/workflow_steps/structure_prediction/pdb_fixer.py
 create mode 100644 icolos/core/workflow_steps/structure_prediction/peptide_embedder.py
 create mode 100644 icolos/core/workflow_steps/structure_prediction/rosetta_abinitio.py
 create mode 100644 icolos/loggers/__init__.py
 create mode 100644 icolos/loggers/agentlogger.py
 create mode 100644 icolos/loggers/base_logger.py
 create mode 100644 icolos/loggers/blank_logger.py
 create mode 100644 icolos/loggers/entrypoint_logger.py
 create mode 100644 icolos/loggers/iologger.py
 create mode 100644 icolos/loggers/logger_utils.py
 create mode 100644 icolos/loggers/steplogger.py
 create mode 100644 icolos/scripts/__init__.py
 create mode 100644 icolos/scripts/cli.py
 create mode 100644 icolos/utils/__init__.py
 create mode 100644 icolos/utils/constants.py
 create mode 100644 icolos/utils/entry_point_functions/__init__.py
 create mode 100644 icolos/utils/entry_point_functions/logging_helper_functions.py
 create mode 100644 icolos/utils/entry_point_functions/parsing_functions.py
 create mode 100644 icolos/utils/enums/__init__.py
 create mode 100644 icolos/utils/enums/composite_agents_enums.py
 create mode 100644 icolos/utils/enums/compound_enums.py
 create mode 100644 icolos/utils/enums/entry_points.py
 create mode 100644 icolos/utils/enums/execution_enums.py
 create mode 100644 icolos/utils/enums/flow_control_enums.py
 create mode 100644 icolos/utils/enums/general_utils_enums.py
 create mode 100644 icolos/utils/enums/input_enums.py
 create mode 100644 icolos/utils/enums/logging_enums.py
 create mode 100644 icolos/utils/enums/parallelization.py
 create mode 100644 icolos/utils/enums/program_parameters.py
 create mode 100644 icolos/utils/enums/step_enums.py
 create mode 100644 icolos/utils/enums/step_initialization_enum.py
 create mode 100644 icolos/utils/enums/write_out_enums.py
 create mode 100644 icolos/utils/execute_external/__init__.py
 create mode 100644 icolos/utils/execute_external/autodockvina.py
 create mode 100644 icolos/utils/execute_external/batch_executor.py
 create mode 100644 icolos/utils/execute_external/cresset_executor.py
 create mode 100644 icolos/utils/execute_external/crest.py
 create mode 100644 icolos/utils/execute_external/execute.py
 create mode 100644 icolos/utils/execute_external/fep_plus.py
 create mode 100644 icolos/utils/execute_external/glide.py
 create mode 100644 icolos/utils/execute_external/gromacs.py
 create mode 100644 icolos/utils/execute_external/ifd_executor.py
 create mode 100644 icolos/utils/execute_external/license_token_guard.py
 create mode 100644 icolos/utils/execute_external/ligprep.py
 create mode 100644 icolos/utils/execute_external/macromodel.py
 create mode 100644 icolos/utils/execute_external/omega.py
 create mode 100644 icolos/utils/execute_external/openbabel.py
 create mode 100644 icolos/utils/execute_external/pmx.py
 create mode 100644 icolos/utils/execute_external/prime.py
 create mode 100644 icolos/utils/execute_external/schrodinger.py
 create mode 100644 icolos/utils/execute_external/sdconvert.py
 create mode 100644 icolos/utils/execute_external/structcat.py
 create mode 100644 icolos/utils/execute_external/turbomole.py
 create mode 100644 icolos/utils/execute_external/xtb.py
 create mode 100644 icolos/utils/general/__init__.py
 create mode 100644 icolos/utils/general/arparse_bool_extension.py
 create mode 100644 icolos/utils/general/convenience_functions.py
 create mode 100644 icolos/utils/general/files_paths.py
 create mode 100644 icolos/utils/general/icolos_exceptions.py
 create mode 100644 icolos/utils/general/molecules.py
 create mode 100644 icolos/utils/general/notifications.py
 create mode 100644 icolos/utils/general/parallelization.py
 create mode 100644 icolos/utils/general/print_log.py
 create mode 100644 icolos/utils/general/progress_bar.py
 create mode 100644 icolos/utils/general/strings.py
 create mode 100644 icolos/utils/smiles.py
 create mode 100644 icolos_workflow.py
 create mode 100644 integration_tests.py
 create mode 100644 licences/espsim_licence.txt
 create mode 100644 pyproject.toml
 create mode 100644 sdf2smi.py
 create mode 100644 setup.py
 create mode 100644 tests/CREST/__init__.py
 create mode 100644 tests/CREST/test_CREST_confgen.py
 create mode 100644 tests/OMEGA/__init__.py
 create mode 100644 tests/OMEGA/test_OMEGA_confgen.py
 create mode 100644 tests/Turbomole/__init__.py
 create mode 100644 tests/Turbomole/test_Turbomole.py
 create mode 100644 tests/XTB/__init__.py
 create mode 100644 tests/XTB/test_XTB_confgen.py
 create mode 100644 tests/__init__.py
 create mode 100644 tests/autodockvina/__init__.py
 create mode 100644 tests/autodockvina/test_adv_docking.py
 create mode 100644 tests/autodockvina/test_adv_target_prep.py
 create mode 100644 tests/boltzmann_weighting/__init__.py
 create mode 100644 tests/boltzmann_weighting/test_boltzmann_weighting.py
 create mode 100644 tests/cavity_explorer/__init__.py
 create mode 100644 tests/cavity_explorer/test_md_pocket.py
 create mode 100644 tests/clustering/__init__.py
 create mode 100644 tests/clustering/test_clustering.py
 create mode 100644 tests/composite_agents/__init__.py
 create mode 100644 tests/composite_agents/test_workflow.py
 create mode 100644 tests/containers/__init__.py
 create mode 100644 tests/containers/test_compound.py
 create mode 100644 tests/containers/test_generic.py
 create mode 100644 tests/containers/test_perturbation_map.py
 create mode 100644 tests/cosmo/__init__.py
 create mode 100644 tests/cosmo/test_Cosmo.py
 create mode 100644 tests/esp_sim/__init__.py
 create mode 100644 tests/esp_sim/test_esp_sim.py
 create mode 100644 tests/feature_counter/__init__.py
 create mode 100644 tests/feature_counter/test_feature_counter.py
 create mode 100644 tests/flow_control/__init__.py
 create mode 100644 tests/flow_control/test_iterator.py
 create mode 100644 tests/gromacs/__init__.py
 create mode 100644 tests/gromacs/test_cluster.py
 create mode 100644 tests/gromacs/test_cluster_ts.py
 create mode 100644 tests/gromacs/test_do_dssp.py
 create mode 100644 tests/gromacs/test_editconf.py
 create mode 100644 tests/gromacs/test_genion.py
 create mode 100644 tests/gromacs/test_grompp.py
 create mode 100644 tests/gromacs/test_mdrun.py
 create mode 100644 tests/gromacs/test_mmpbsa.py
 create mode 100644 tests/gromacs/test_pdb2gmx.py
 create mode 100644 tests/gromacs/test_removelig.py
 create mode 100644 tests/gromacs/test_rmsd.py
 create mode 100644 tests/gromacs/test_solvate.py
 create mode 100644 tests/gromacs/test_trjcat.py
 create mode 100644 tests/gromacs/test_trjconv.py
 create mode 100644 tests/integration_tests/__init__.py
 create mode 100644 tests/integration_tests/test_docking.py
 create mode 100644 tests/integration_tests/test_fep_plus.py
 create mode 100644 tests/integration_tests/test_gromacs.py
 create mode 100644 tests/integration_tests/test_rmsd_iter.py
 create mode 100644 tests/io/__init__.py
 create mode 100644 tests/io/test_data_manipulation.py
 create mode 100644 tests/io/test_embedder.py
 create mode 100644 tests/io/test_initialize_compound.py
 create mode 100644 tests/panther/__init__.py
 create mode 100644 tests/panther/test_panther.py
 create mode 100644 tests/pmx/__init__.py
 create mode 100644 tests/pmx/test_abfe.py
 create mode 100644 tests/pmx/test_analyse.py
 create mode 100644 tests/pmx/test_assemble_systems.py
 create mode 100644 tests/pmx/test_atomMapping.py
 create mode 100644 tests/pmx/test_box_water_ions.py
 create mode 100644 tests/pmx/test_doublebox.py
 create mode 100644 tests/pmx/test_genlib.py
 create mode 100644 tests/pmx/test_gentop.py
 create mode 100644 tests/pmx/test_ligandHybrid.py
 create mode 100644 tests/pmx/test_mutate.py
 create mode 100644 tests/pmx/test_prepare_simulations.py
 create mode 100644 tests/pmx/test_prepare_transitions.py
 create mode 100644 tests/pmx/test_run_simulations.py
 create mode 100644 tests/pmx/test_setup_workpath.py
 create mode 100644 tests/prediction/__init__.py
 create mode 100644 tests/prediction/test_active_learning.py
 create mode 100644 tests/prediction/test_model_building.py
 create mode 100644 tests/prediction/test_predictor.py
 create mode 100644 tests/rms_filter/__init__.py
 create mode 100644 tests/rms_filter/test_rmsfilter.py
 create mode 100644 tests/rmsd/__init__.py
 create mode 100644 tests/rmsd/test_rmsd.py
 create mode 100644 tests/schrodinger/__init__.py
 create mode 100644 tests/schrodinger/test_desmond_production.py
 create mode 100644 tests/schrodinger/test_desmond_setup.py
 create mode 100644 tests/schrodinger/test_fep_analysis.py
 create mode 100644 tests/schrodinger/test_fep_plus_execution.py
 create mode 100644 tests/schrodinger/test_fep_plus_setup.py
 create mode 100644 tests/schrodinger/test_glide.py
 create mode 100644 tests/schrodinger/test_ligprep.py
 create mode 100644 tests/schrodinger/test_macromodel.py
 create mode 100644 tests/schrodinger/test_prepwizard.py
 create mode 100644 tests/schrodinger/test_prime.py
 create mode 100644 tests/shaep/__init__.py
 create mode 100644 tests/shaep/test_shaep.py
 create mode 100644 tests/step_utils/__init__.py
 create mode 100644 tests/step_utils/test_input_merger.py
 create mode 100644 tests/step_utils/test_input_preparator.py
 create mode 100644 tests/step_utils/test_run_variables_resolver.py
 create mode 100644 tests/step_utils/test_structconvert.py
 create mode 100644 tests/step_utils/test_writeout.py
 create mode 100644 tests/structure_prediction/__init__.py
 create mode 100644 tests/structure_prediction/test_dssp.py
 create mode 100644 tests/structure_prediction/test_pdb_fixer.py
 create mode 100644 tests/structure_prediction/test_peptide_embedder.py
 create mode 100644 tests/tests_paths.py
 create mode 100644 unit_tests.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..3c4e837
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,16 @@
+__pycache__
+*.pyc
+package.json
+.vscode
+tags
+.idea
+
+timer.dat
+git-commands.txt
+icolos/config/unit_tests_config/config.json
+
+tests/junk
+*.log
+.directory
+*_cache
+*.ipynb_checkpoints
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..c1f3f38
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,68 @@
+# Update log
+
+### Version 1.4.0 | 2022-01-19
+#### Features
+- Added support for non-equilibrium relative binding free energy calculation with PMX.
+- Added Glide support for feeding in "in" files from Maestro directly.
+- Added AutoDock Vina as docking backend.
+
+#### Internal
+- Limited refactoring of support functions.
+
+### Version 1.3.0 | 2021-11-18
+#### Features
+- Added Iterator mechanism for parallel step execution.
+- Pose rescoring my RMSD workflow.
+- MMGBSA workflow with GROMACS.
+
+#### Internal
+- Improved error logging from subprocesses.
+- Improvements to MDpocket workflows.
+- Refactored example workflows + added new examples.
+
+### Version 1.2.0 | 2021-09-15
+#### Features
+- Added MDpocket workflow for pocket identification.
+- Expanded scope of GROMACS workflow for improved ligand/cofactor parametrisation.
+- Improved FEP+ workflow map construction and analysis.
+- Performance optimisation for Turbomole and Prime.
+- Added PDBFixer step.
+- Added ensemble docking.
+
+#### Internal
+- Improved temporary file handling.
+
+### Version 1.1.0 | 2021-06-30
+#### Features
+- Added `Ligprep` workflow step.
+- Added `Glide` workflow step.
+- Added run-time global variables.
+- Added JSON input type (`REINVENT`-compatible).
+- Additional `GROMACS` binaries, and automated ligand parametrisation.
+- Added support for Schrodinger's `FEP+` workflow.
+- Added support for `OptunaAZ` model building.
+
+#### Bug fixes
+- Fixed problems in tabular write-out (no compound names and sometimes lost column order).
+- Fixed bug in aggregation (`highest_is_best` parameter was not working properly).
+- Fixed instability with step write-out (occurred when no conformers were associated with a compound).
+- Fixed bug in the parallelization of `Ligprep`.
+
+#### Internal
+- Refactored structure for `Schrodinger` binaries.
+- Reworked the write-out functionality.
+- Reworked internal file handling.
+- Reworked generic data handling.
+
+### Version 1.0.0 | 2021-05-21
+#### Features
+- Basic functionality (data handling, backend wrapping).
+- Various steps implemented (`Turbomole`, `Cosmo`, `OMEGA`, `GROMACS`, ...).
+
+### Bug fixes
+- Fixed issues with `Turbomole` execution.
+- Enforced GROMACS execution in `tmp_dir`.
+
+### Internal
+- Adapted `pydantic` interface.
+
diff --git a/DOCKERFILE b/DOCKERFILE
new file mode 100644
index 0000000..3971749
--- /dev/null
+++ b/DOCKERFILE
@@ -0,0 +1,7 @@
+#syntax=docker/dockerfile:1
+
+FROM continuumio/miniconda3
+
+COPY environment.yml .
+
+RUN conda env create -f environment.yml
diff --git a/LICENCE b/LICENCE
new file mode 100644
index 0000000..1f120b7
--- /dev/null
+++ b/LICENCE
@@ -0,0 +1,169 @@
+Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2022 Molecular AI, AstraZeneca
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3fb6af0
--- /dev/null
+++ b/README.md
@@ -0,0 +1,75 @@
+[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/python/black) 
+
+
+# `Icolos`: Workflow manager
+
+The `Icolos` tool is a workflow manager, that aims at separating execution logic from actual implementation as much as
+possible. Workflows are specified in `JSON` files (see folder `examples`), linking steps together. Currently wrapped are
+a diverse set of tools and internal steps, including QM and MD software.
+
+
+## Introduction
+`Icolos` provides a single, unified interface to a host of software for common computational chemistry calculations, with built in parallelization,
+and straight-forward extensibiltiy to add additional functionality. It was principally developed to handle structural calculations for `REINVENT` jobs, however, various workflows have also been used as stand-alone pipelines.
+
+Workflows are constructed from elementary 'steps', individual blocks which specify a sequential list of operations (normally corresponding to a single program being executed),
+with control of the command-line options provided through step settings, and options to control other aspects of the step's behaviour included in the `additional` block.
+
+For many use cases, one of the template workflows might suit your needs, or need a few tweaks to do what you want. The JSONs in the example folder are less regularly updated
+with new features and are mostly used for integration testing.
+
+## Initial configuration
+You are welcome to clone the repository and use a local version, and in particular if you would like to experiment with the code base and/or contribute features, please get 
+in contact with us.
+
+## Installation
+After cloning, first install the `icolosprod` `conda` environment:
+```
+conda create -f environment_min.yml
+```
+
+## Execution
+Once a `JSON` is specified, the workflow can be executed like so:
+
+```
+conda activate icolosprod
+python executor.py -conf workflow.json
+```
+
+## `SLURM` Execution
+Once specified, a workflow can be called like this in a `bash` script:
+
+```
+#!/bin/bash -l
+#SBATCH -N 1
+#SBATCH -t 0-02:59:00
+#SBATCH -p core
+#SBATCH --ntasks-per-node=5
+#SBATCH --mem-per-cpu=2G
+
+source /<conda_path>/miniconda3/bin/activate /<conda_path>/minconda3/envs/icolosprod
+python /<icolos_path>/Icolos/executor.py -conf workflow.json
+```
+For GROMACS workflows requiring the GPU partition, you will need to adapt the header accordingly, e.g. like so:
+
+```
+#!/bin/bash
+#SBATCH -J gmx_cco1_fold_microsecond
+#SBATCH -o MygpuJob_out_%j.txt
+#SBATCH -e MygpuJob_err_%j.txt
+#SBATCH --nodes=1
+#SBATCH --ntasks=4
+#SBATCH --cpus-per-task=4
+#SBATCH --gres=gpu:4
+#SBATCH --gres-flags=enforce-binding
+#SBATCH --mem-per-cpu=4g
+#SBATCH -p gpu
+#SBATCH --time=370:00:00
+
+```
+
+## Developers
+- Christian Margreitter <christian.margreitter@astrazeneca.com>
+- J. Harry Moore <harry.moore@astrazeneca.com>
+- Matthias R. Bauer <mattias.r.b@gmail.com>
diff --git a/environment_min.yml b/environment_min.yml
new file mode 100644
index 0000000..8929b20
--- /dev/null
+++ b/environment_min.yml
@@ -0,0 +1,24 @@
+name: icolosprod
+channels:
+  - psi4
+  - conda-forge
+  - defaults
+  - rdkit
+dependencies:
+  - biopython>=1.79
+  - ipython
+  - pip
+  - python>=3.9
+  - scikit-learn>=1.0.1
+  - modal>=0.4
+  - psi4>=1.4
+  - pdbfixer
+  - pydantic>=1.8
+  - pyvis
+  - requests
+  - openbabel>=3
+  - rdkit>=2021.09.2
+  - pip:
+    - black
+    - regex
+    - peptidebuilder>=1.1
diff --git a/examples/hpc_script/NIBR_reinvent.sh b/examples/hpc_script/NIBR_reinvent.sh
new file mode 100644
index 0000000..a9be63e
--- /dev/null
+++ b/examples/hpc_script/NIBR_reinvent.sh
@@ -0,0 +1,11 @@
+#!/bin/bash -l
+#SBATCH -N 1
+#SBATCH -t 0-02:59:00
+#SBATCH -p core
+#SBATCH --ntasks-per-node=5
+#SBATCH --mem-per-cpu=2G
+
+source /projects/cc/mai/miniconda3/bin/activate /projects/cc/mai/miniconda3/envs/Icolos
+python /projects/cc/mai/IcolosDev/executor.py -conf /projects/cc/mai/material/Icolos/templates/NIBR/12-06-21nibr.json -debug \
+       --global_variables "entrypoint_dir:<path>/icolos, input_path_json:{entrypoint_dir}/tests/data/reinvent/small_input.json, output_path_json:{entrypoint_dir}/tests/junk/nibr_reinvent.json"
+
diff --git a/examples/hpc_script/SLURM_script.sh b/examples/hpc_script/SLURM_script.sh
new file mode 100644
index 0000000..a7fa700
--- /dev/null
+++ b/examples/hpc_script/SLURM_script.sh
@@ -0,0 +1,11 @@
+#!/bin/bash -l
+#SBATCH -N 1
+#SBATCH -t 0-02:59:00
+#SBATCH -p core
+#SBATCH --ntasks-per-node=5
+#SBATCH --mem-per-cpu=2G
+
+source /projects/cc/mai/miniconda3/bin/activate /projects/cc/mai/miniconda3/envs/Icolos
+python /projects/cc/mai/Icolos/executor.py -conf /projects/cc/mai/examples/Icolos/MPI_test/workflow_ReSCoSS.json \
+       --global_variables "output_dir:<path>/icolos/tests/junk" -debug
+
diff --git a/examples/workflow/desmond_md.json b/examples/workflow/desmond_md.json
new file mode 100644
index 0000000..a9e417f
--- /dev/null
+++ b/examples/workflow/desmond_md.json
@@ -0,0 +1,72 @@
+{
+    "workflow": {
+        "header": {
+            "workflow_id": "desmond md",
+            "description": "Desmond simulation.",
+            "environment": {
+                "export": [
+                ]
+            },
+            "global_variables": {
+				"output_dir": "{entrypoint_dir}/tests/junk/desmond"
+            }
+        },
+        "steps": [{
+                "step_id": "desmond_md",
+                "type": "desmond",
+                "execution": {
+                    "prefix_execution": "module load schrodinger/2021-1-js-aws"
+                    },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "-HOST": "localhost"
+                        }
+                    },
+                    "additional": {
+						"setup_msj_fields": {
+
+						},
+						"msj_fields": {
+
+						},
+						"cfg_fields": {
+							"time": 0.01
+						}
+                    }
+                },
+                "input": {
+                    "generic": [{
+                            "source": "{entrypoint_dir}/../IcolosData/molecules/1UYD/1UYD_apo.pdb",
+                            "extension": "pdb"
+                        }
+                    ]
+                },
+                "writeout": [
+                    {
+                        "generic": {
+                            "key": "cms"
+                        },
+                        "destination": {
+                            "resource": "{output_dir}",
+                            "type": "file",
+                            "format": "txt",
+							"mode": "automatic"
+                        }
+                    },{
+                        "generic": {
+                            "key": "dir"
+                        },
+                        "destination": {
+                            "resource": "{output_dir}",
+                            "type": "file",
+                            "format": "txt",
+							"mode": "dir"
+                        }
+                    }
+                ]
+            } 
+        ]
+    }
+}
diff --git a/examples/workflow/docking/active_learning_docking.json b/examples/workflow/docking/active_learning_docking.json
new file mode 100644
index 0000000..361fd36
--- /dev/null
+++ b/examples/workflow/docking/active_learning_docking.json
@@ -0,0 +1,68 @@
+{
+    "workflow": {
+        "header": {
+            "workflow_id": "Active_learning_docking",
+            "description": "Bayesian optimisation scheme with Glide docking oracle.",
+            "environment": {
+                "export": [
+
+                ]
+            },
+            "global_variables": {
+            }
+        },
+        "steps": [{
+                "step_id": "active_learning",
+                "type": "active_learning",
+				"settings": {
+					"additional": {
+						"running_mode": "active_learning",
+						"virtual_lib": "<path>/lib.sdf",
+						"validation_lib": "<path>/val.sdf",
+						"activity_threshold": -7,
+						"n_rounds": 10,
+						"init_samples": 256,
+						"batch_size" : 128,
+						"criteria": "r_i_docking_score",
+						"oracle_config": {
+							"step_id": "Glide_oracle",
+							"type": "glide",
+							"execution": {
+								"prefix_execution": "module load schrodinger/2021-2-js-aws",
+								"parallelization": {
+									"cores": 32,
+									"max_length_sublists": 4
+								},
+								"failure_policy": {
+									"n_tries": 3
+								}
+							},
+							"settings": {
+								"arguments": {
+									"flags": [],
+									"parameters": {
+										"-HOST": "cpu-only"
+									}
+								},
+								"additional": {
+									"configuration": {
+										"AMIDE_MODE": "trans",
+										"EXPANDED_SAMPLING": "True",
+										"GRIDFILE": ["<grid>.zip"],
+										"NENHANCED_SAMPLING": "1",
+										"POSE_OUTTYPE": "ligandlib_sd",
+										"POSES_PER_LIG": "1",
+										"POSTDOCK_NPOSE": "25",
+										"POSTDOCKSTRAIN": "True",
+										"PRECISION": "SP",
+										"REWARD_INTRA_HBONDS": "True"
+									}
+								}
+							}
+						}
+					}
+				}
+			}            
+        ]
+    }
+}
\ No newline at end of file
diff --git a/examples/workflow/docking/adv_docking.json b/examples/workflow/docking/adv_docking.json
new file mode 100644
index 0000000..36a7987
--- /dev/null
+++ b/examples/workflow/docking/adv_docking.json
@@ -0,0 +1,120 @@
+{
+    "workflow": {
+        "header": {
+            "workflow_id": "AutoDock Vina docking",
+            "description": "Runs docking using AutoDock Vina and a predefined receptor file.",
+            "environment": {
+                "export": [
+                ]
+            },
+            "global_variables": {
+                "smiles": "another_mol:Nc1ccc(cc1N)C(F)(F)F;failure:CXXC;aspirin:O=C(C)Oc1ccccc1C(=O)O",
+                "receptor_path": "{entrypoint_dir}/../IcolosData/AutoDockVina/1UYD_fixed.pdbqt"
+            }
+        },
+        "steps": [{
+                "step_id": "Ligprep",
+                "type": "ligprep",
+                "execution": {
+                    "prefix_execution": "module load schrodinger/2020-4",
+                    "parallelization": {
+                        "cores": 2,
+                        "max_length_sublists": 1
+                    },
+                    "failure_policy": {
+                        "n_tries": 3
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": ["-epik"],
+                        "parameters": {
+                            "-ph": 7.0,
+                            "-pht": 2.0,
+                            "-s": 10,
+                            "-bff": 14
+                        }
+                    },
+                    "additional": {
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "{smiles}",
+                            "source_type": "string"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "ADV",
+                "type": "vina_docking",
+                "execution": {
+                    "prefix_execution": "module load AutoDock_Vina",
+                    "parallelization": {
+                        "cores": 4
+                    },
+                    "failure_policy": {
+                        "n_tries": 3
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                        }
+                    },
+                    "additional": {
+                        "configuration": {
+                            "seed": 42,
+                            "receptor_path": "{receptor_path}",
+                            "number_poses": 2,
+                            "search_space": {
+                                "--center_x": 3.3,
+                                "--center_y": 11.5,
+                                "--center_z": 24.8,
+                                "--size_x": 15,
+                                "--size_y": 10,
+                                "--size_z": 10
+                            }
+                        },
+                        "grid_ids": ["1UYD"]
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "Ligprep",
+                            "source_type": "step"
+                        }
+                    ]
+                },
+                "writeout": [
+                    {
+                        "compounds": {
+                            "category": "conformers"
+                        },
+                        "destination": {
+                            "resource": "{entrypoint_dir}/tests/junk/adv_docked_conformers.sdf",
+                            "type": "file",
+                            "format": "SDF"
+                        }
+                    },
+                    {
+                        "compounds": {
+                            "category": "conformers",
+                            "selected_tags": ["docking_score", "grid_id"],
+                            "aggregation": {
+                                "mode": "best_per_compound",
+                                "key": "docking_score"
+                            }
+                        },
+                        "destination": {
+                            "resource": "{entrypoint_dir}/tests/junk/adv_docked_conformers.csv",
+                            "type": "file",
+                            "format": "CSV"
+                        }
+                    }
+                ]
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/examples/workflow/docking/adv_target_preparation.json b/examples/workflow/docking/adv_target_preparation.json
new file mode 100644
index 0000000..b66d8ab
--- /dev/null
+++ b/examples/workflow/docking/adv_target_preparation.json
@@ -0,0 +1,44 @@
+{
+    "workflow": {
+        "header": {
+            "workflow_id": "AutoDock Vina target preparation",
+            "description": "Runs target preparation for AutoDock Vina and generates a PDBQT receptor file.",
+            "environment": {
+                "export": [{
+                        "key": "OE_LICENSE",
+                        "value": "/opt/scp/software/oelicense/1.0/oe_license.seq1"
+                    }
+
+                ]
+            },
+            "global_variables": {
+                "receptor_input_apo_path": "{entrypoint_dir}/../IcolosData/molecules/1UYD/1UYD_apo.pdb",
+                "reference_ligand_path": "{entrypoint_dir}/../IcolosData/molecules/1UYD/PU8_native_ligand.pdb",
+                "receptor_output_path": "{entrypoint_dir}/tests/junk/1UYD_fixed.pdbqt"
+            }
+        },
+        "steps": [{
+                "step_id": "ADV_target_preparation",
+                "type": "vina_target_preparation",
+                "execution": {
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                        }
+                    },
+                    "additional": {
+                        "pH": 7.4,
+                        "input_receptor_pdb": "{receptor_input_apo_path}",
+                        "output_receptor_pdbqt": "{receptor_output_path}",
+                        "extract_box": {
+                            "reference_ligand_path": "{reference_ligand_path}",
+                            "reference_ligand_format": "PDB"
+                        }
+                    }
+                }
+            }
+        ]
+    }
+}
diff --git a/examples/workflow/docking/glide_docking.json b/examples/workflow/docking/glide_docking.json
new file mode 100644
index 0000000..228db6c
--- /dev/null
+++ b/examples/workflow/docking/glide_docking.json
@@ -0,0 +1,122 @@
+{
+    "workflow": {
+        "header": {
+            "workflow_id": "Docking with Glide",
+            "description": "Docking a few compounds with Glide after Ligprep embedding",
+            "environment": {
+                "export": [
+                ]
+            },
+            "global_variables": {
+                "smiles": "another_mol:Nc1ccc(cc1N)C(F)(F)F;failure:CXXC;aspirin:O=C(C)Oc1ccccc1C(=O)O"
+            }
+        },
+        "steps": [{
+                "step_id": "Ligprep",
+                "type": "ligprep",
+                "execution": {
+                    "prefix_execution": "module load schrodinger/2020-4",
+                    "parallelization": {
+                        "cores": 2,
+                        "max_length_sublists": 1
+                    },
+                    "failure_policy": {
+                        "n_tries": 3
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": ["-epik"],
+                        "parameters": {
+                            "-ph": 7.0,
+                            "-pht": 2.0,
+                            "-s": 10,
+                            "-bff": 14
+                        }
+                    },
+                    "additional": {
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "{smiles}",
+                            "source_type": "string"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "Glide",
+                "type": "glide",
+                "execution": {
+                    "prefix_execution": "module load schrodinger/2021-1-js-aws",
+                    "parallelization": {
+                        "cores": 4,
+                        "max_length_sublists": 1
+                    },
+                    "failure_policy": {
+                        "n_tries": 3
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "-HOST": "cpu-only"
+                        }
+                    },
+                    "additional": {
+                        "configuration": {
+                            "AMIDE_MODE": "trans",
+                            "EXPANDED_SAMPLING": "True",
+                            "GRIDFILE": ["{entrypoint_dir}/../IcolosData/Glide/1UYD_grid_constraints.zip"],
+                            "NENHANCED_SAMPLING": "1",
+                            "POSE_OUTTYPE": "ligandlib_sd",
+                            "POSES_PER_LIG": "3",
+                            "POSTDOCK_NPOSE": "25",
+                            "POSTDOCKSTRAIN": "True",
+                            "PRECISION": "SP",
+                            "REWARD_INTRA_HBONDS": "True"
+                        },
+                        "maestro_in_file": {
+                            "path": "{entrypoint_dir}/../IcolosData/Glide/example.in"
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "Ligprep",
+                            "source_type": "step"
+                        }
+                    ]
+                },
+                "writeout": [
+                    {
+                        "compounds": {
+                            "category": "conformers"
+                        },
+                        "destination": {
+                            "resource": "{entrypoint_dir}/tests/junk/docked_conformers.sdf",
+                            "type": "file",
+                            "format": "SDF"
+                        }
+                    },
+                    {
+                        "compounds": {
+                            "category": "conformers",
+                            "selected_tags": ["docking_score", "grid_id"],
+                            "aggregation": {
+                                "mode": "best_per_compound",
+                                "key": "docking_score"
+                            }
+                        },
+                        "destination": {
+                            "resource": "{entrypoint_dir}/tests/junk/docked_conformers.csv",
+                            "type": "file",
+                            "format": "CSV"
+                        }
+                    }
+                ]
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/examples/workflow/fep_plus/fep_plus_aws.json b/examples/workflow/fep_plus/fep_plus_aws.json
new file mode 100644
index 0000000..6e08345
--- /dev/null
+++ b/examples/workflow/fep_plus/fep_plus_aws.json
@@ -0,0 +1,171 @@
+{
+    "workflow": {
+        "header": {
+            "workflow_id": "Docking/FEP+ combined workflow",
+            "description": "Test setup for FEP+ integration being run in the cloud (AWS).",
+            "environment": {
+                "export": [
+                ]
+            },
+            "global_variables": {
+                "smiles": "4-[4-(4-chlorophenyl)-2-methyl-1,3-thiazol-5-yl]benzenesulfonamide:Cc1nc(-c2ccc(Cl)cc2)c(-c2ccc(S(N)(=O)=O)cc2)s1;N-methyl-N-(4-methylphenyl)-4-(4-methylsulfonylphenyl)-6-(trifluoromethyl)pyrimidin-2-amine:Cc1ccc(N(C)c2nc(-c3ccc(S(C)(=O)=O)cc3)cc(C(F)(F)F)n2)cc1"
+            }
+        },
+        "steps": [{
+                "step_id": "initialization_smile",
+                "type": "initialization",
+                "input": {
+                    "compounds": [{
+                            "source": "{smiles}",
+                            "source_type": "string"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "Ligprep",
+                "type": "ligprep",
+                "execution": {
+                    "prefix_execution": "module load schrodinger/2020-4",
+                    "parallelization": {
+                        "cores": 2,
+                        "max_length_sublists": 1
+                    },
+                    "failure_policy": {
+                        "n_tries": 3
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": ["-epik"],
+                        "parameters": {
+                            "-ph": 7.0,
+                            "-pht": 2.0,
+                            "-s": 10,
+							"-bff": 14,
+							"-HOST": "localhost"
+                        }
+                    },
+                    "additional": {
+                        "filter_file": {
+                            "Total_charge": "!= 0"
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "initialization_smile",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            },
+            {
+                "step_id": "Glide",
+                "type": "glide",
+                "execution": {
+                    "prefix_execution": "module load schrodinger/2021-1-js-aws",
+                    "parallelization": {
+                        "cores": 4,
+                        "max_length_sublists": 1
+                    },
+                    "failure_policy": {
+                        "n_tries": 3
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "-HOST": "cpu-only"
+                        }
+                    },
+                    "additional": {
+                        "configuration": {
+                            "AMIDE_MODE": "trans",
+                            "EXPANDED_SAMPLING": "True",
+                            "GRIDFILE": ["{entrypoint_dir}/../IcolosData_junk/molecules/1CX2/1cx2_GridGen.zip"],
+                            "NENHANCED_SAMPLING": "1",
+                            "POSE_OUTTYPE": "poseviewer",
+                            "POSES_PER_LIG": "1",
+                            "POSTDOCK_NPOSE": "25",
+                            "POSTDOCKSTRAIN": "True",
+                            "PRECISION": "SP",
+                            "REWARD_INTRA_HBONDS": "True"
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "Ligprep",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            },{
+                "step_id": "FEP_plus_setup",
+                "type": "fep_plus_setup",
+                "execution": {
+                    "prefix_execution": "module load schrodinger/2021-2-js-aws"
+                },
+                "settings": {
+
+                },
+                "input": {
+                    "compounds": [{
+                        "source": "Glide",
+                        "source_type": "step",
+                        "target_field": "compounds"
+                        
+                    }]
+                }
+            },{
+                "step_id": "FEP_plus_exec",
+                "type": "fep_plus_exec",
+                "execution": {
+                   "prefix_execution": "module load schrodinger/2021-2-js-aws && $SCHRODINGER/jsc download-start"
+                },
+                "token_guard": {
+                    "prefix_execution": "module load schrodinger/2021-2-js-aws",
+                    "binary_location": "ssh <location> /opt/schrodinger/suite/installations/default",
+                    "token_pools": {
+                        "FEP_GPGPU": 16
+                    },
+                    "wait_interval_seconds": 30,
+                    "wait_limit_seconds": 0
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "-JOBNAME": "test",
+                            "-HOST": "fep-compute"
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                        "source": "Glide",
+                        "source_type": "step",
+                        "target_field": "compounds"
+                    }],
+                    "generic": [{
+                        "source": "FEP_plus_setup",
+                        "extension": "fmp"
+                    }]
+                },
+                "writeout": [{
+                        "compounds": {
+                            "category": "conformers",
+                            "selected_tags": ["dG", "docking_score"]
+                        },
+                        "destination": {
+                            "resource": "{entrypoint_dir}/tests/junk/fe_plus_resultsv",
+                            "type": "file",
+                            "format": "CSV"
+                        }
+                }]
+            }
+            
+        ]
+    }
+}
diff --git a/examples/workflow/gromacs/gromacs_ensemble_mmgbsa.json b/examples/workflow/gromacs/gromacs_ensemble_mmgbsa.json
new file mode 100644
index 0000000..7b04bf8
--- /dev/null
+++ b/examples/workflow/gromacs/gromacs_ensemble_mmgbsa.json
@@ -0,0 +1,619 @@
+{
+	"workflow": {
+		"header": {
+			"workflow_id": "gromacs_ensemble_mmgbsa",
+			"description": "ensemble MMGBSA demonstration - step iteration + SLURM job control",
+			"environment": {
+				"export": [
+					{
+						"key": "GMX_GPU_DD_COMMS",
+						"value": "true"
+					},
+					{
+						"key": "GMX_GPU_PME_PP_COMMS",
+						"value": "true"
+					},
+					{
+						"key": "GMX_FORCE_UPDATE_DEFAULT_GPU",
+						"value": "true"
+					},
+					{
+						"key": "ACPYPE",
+						"value": "${ACPYPE}/acpype"
+					}
+				]
+			},
+			"global_variables": {
+				"file_base": "{entrypoint_dir}/../IcolosData/gromacs/protein",
+				"output_dir": "{entrypoint_dir}/tests/junk/gromacs"
+			}
+		},
+		"steps": [
+			{
+				"step_id": "01_pdb2gmx",
+				"type": "pdb2gmx",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [
+							"-ignh"
+						],
+						"parameters": {
+							"-water": "tip3p",
+							"-ff": "amber03"
+						}
+					},
+					"additional": {}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "{file_base}/1BVG.pdb",
+							"extension": "pdb"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "02_editconf",
+				"type": "editconf",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [
+							"-c"
+						],
+						"parameters": {
+							"-d": "1.2",
+							"-bt": "dodecahedron"
+						}
+					},
+					"additional": {}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "01_pdb2gmx",
+							"extension": "gro"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "03_solvate",
+				"type": "solvate",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {
+							"-cs": "spc216.gro"
+						}
+					},
+					"additional": {}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "02_editconf",
+							"extension": "gro"
+						},
+						{
+							"source": "01_pdb2gmx",
+							"extension": "top"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "04_grompp",
+				"type": "grompp",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {
+						"-r": false
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "03_solvate",
+							"extension": "gro"
+						},
+						{
+							"source": "{file_base}/ions.mdp",
+							"extension": "mdp"
+						},
+						{
+							"source": "03_solvate",
+							"extension": "top"
+						},
+						{
+							"source": "01_pdb2gmx",
+							"extension": "itp"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "05_genion",
+				"type": "genion",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [
+							"-neutral"
+						],
+						"parameters": {
+							"-pname": "NA",
+							"-nname": "CL"
+						}
+					},
+					"additional": {
+						"pipe_input": "SOL"
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "04_grompp",
+							"extension": "tpr"
+						},
+						{
+							"source": "04_grompp",
+							"extension": "top"
+						},
+						{
+							"source": "04_grompp",
+							"extension": "itp"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "gromacs_iterator",
+				"type": "iterator",
+				"base_config": [
+					{
+						"step_id": "06_grompp_eminim",
+						"type": "grompp",
+						"execution": {
+							"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+						},
+						"settings": {
+							"arguments": {
+								"flags": [],
+								"parameters": {}
+							},
+							"additional": {
+								"-r": false
+							}
+						},
+						"input": {
+							"generic": [
+								{
+									"source": "05_genion",
+									"extension": "gro"
+								},
+								{
+									"source": "{file_base}/minim.mdp",
+									"extension": "mdp"
+								},
+								{
+									"source": "05_genion",
+									"extension": "top"
+								},
+								{
+									"source": "05_genion",
+									"extension": "itp"
+								}
+							]
+						}
+					},
+					{
+						"step_id": "07_eminim_mdrun",
+						"type": "mdrun",
+						"execution": {
+							"resource": "slurm",
+							"resources": {
+								"partition": "gpu",
+								"gres": "gpu:1",
+								"modules": [
+									"GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+								]
+							}
+						},
+						"settings": {
+							"arguments": {
+								"flags": [],
+								"parameters": {}
+							},
+							"additional": {}
+						},
+						"input": {
+							"generic": [
+								{
+									"source": "06_grompp_eminim",
+									"extension": "tpr"
+								}
+							]
+						}
+					},
+					{
+						"step_id": "08_nvt_grompp",
+						"type": "grompp",
+						"execution": {
+							"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+						},
+						"settings": {
+							"arguments": {
+								"flags": [],
+								"parameters": {
+									"-n": "index.ndx"
+								}
+							},
+							"additional": {
+								"make_ndx_command": "auto",
+								"-r": true
+							}
+						},
+						"input": {
+							"generic": [
+								{
+									"source": "07_eminim_mdrun",
+									"extension": "gro"
+								},
+								{
+									"source": "05_genion",
+									"extension": "top"
+								},
+								{
+									"source": "{file_base}/nvt_equil.mdp",
+									"extension": "mdp"
+								},
+								{
+									"source": "01_pdb2gmx",
+									"extension": "itp"
+								}
+							]
+						}
+					},
+					{
+						"step_id": "09_nvt_mdrun",
+						"type": "mdrun",
+						"execution": {
+							"resource": "slurm",
+							"resources": {
+								"partition": "gpu",
+								"gres": "gpu:1",
+								"modules": [
+									"GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+								]
+							}
+						},
+						"settings": {
+							"arguments": {
+								"flags": [],
+								"parameters": {}
+							},
+							"additional": {}
+						},
+						"input": {
+							"generic": [
+								{
+									"source": "08_nvt_grompp",
+									"extension": "tpr"
+								}
+							]
+						}
+					},
+					{
+						"step_id": "10_npt_grompp",
+						"type": "grompp",
+						"execution": {
+							"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+						},
+						"settings": {
+							"arguments": {
+								"flags": [],
+								"parameters": {
+									"-n": "index.ndx"
+								}
+							},
+							"additional": {
+								"make_ndx_command": "auto",
+								"-r": true
+							}
+						},
+						"input": {
+							"generic": [
+								{
+									"source": "09_nvt_mdrun",
+									"extension": "gro"
+								},
+								{
+									"source": "05_genion",
+									"extension": "top"
+								},
+								{
+									"source": "{file_base}/npt_equil.mdp",
+									"extension": "mdp"
+								},
+								{
+									"source": "01_pdb2gmx",
+									"extension": "itp"
+								},
+								{
+									"source": "08_nvt_grompp",
+									"extension": "ndx"
+								}
+							]
+						}
+					},
+					{
+						"step_id": "11_npt_mdrun",
+						"type": "mdrun",
+						"execution": {
+							"resource": "slurm",
+							"resources": {
+								"partition": "gpu",
+								"gres": "gpu:1",
+								"modules": [
+									"GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+								]
+							}
+						},
+						"settings": {
+							"arguments": {
+								"flags": [],
+								"parameters": {}
+							},
+							"additional": {}
+						},
+						"input": {
+							"generic": [
+								{
+									"source": "10_npt_grompp",
+									"extension": "tpr"
+								}
+							]
+						}
+					},
+					{
+						"step_id": "12_prod_md_grompp",
+						"type": "grompp",
+						"execution": {
+							"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+						},
+						"settings": {
+							"arguments": {
+								"flags": [],
+								"parameters": {
+									"-n": "index.ndx"
+								}
+							},
+							"additional": {
+								"fields": {
+									"nsteps": "4000000"
+								},
+								"make_ndx_command": "auto",
+								"-r": false
+							}
+						},
+						"input": {
+							"generic": [
+								{
+									"source": "11_npt_mdrun",
+									"extension": "gro"
+								},
+								{
+									"source": "05_genion",
+									"extension": "top"
+								},
+								{
+									"source": "{file_base}/md.mdp",
+									"extension": "mdp"
+								},
+								{
+									"source": "01_pdb2gmx",
+									"extension": "itp"
+								}
+							]
+						}
+					},
+					{
+						"step_id": "13_prod_mdrun",
+						"type": "mdrun",
+						"execution": {
+							"resource": "slurm",
+							"resources": {
+								"partition": "gpu",
+								"gres": "gpu:1",
+								"modules": [
+									"GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+								]
+							}
+						},
+						"settings": {
+							"arguments": {
+								"flags": [],
+								"parameters": {
+									"-nb": "gpu",
+									"-bonded": "gpu",
+									"-pme": "gpu"
+								}
+							}
+						},
+						"input": {
+							"generic": [
+								{
+									"source": "12_prod_md_grompp",
+									"extension": "tpr"
+								}
+							]
+						}
+					},
+					{
+						"step_id": "14_trjconv",
+						"type": "trjconv",
+						"execution": {
+							"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+						},
+						"settings": {
+							"arguments": {
+								"flags": [
+									"-center"
+								],
+								"parameters": {
+									"-pbc": "mol",
+									"-n": "index.ndx"
+								}
+							},
+							"additional": {
+								"pipe_input": "Protein_Other System"
+							}
+						},
+						"input": {
+							"generic": [
+								{
+									"source": "13_prod_mdrun",
+									"extension": "xtc"
+								},
+								{
+									"source": "12_prod_md_grompp",
+									"extension": "tpr"
+								},
+								{
+									"source": "12_prod_md_grompp",
+									"extension": "ndx"
+								}
+							]
+						},
+						"writeout": [
+							{
+								"generic": {
+									"key": "xtc"
+								},
+								"destination": {
+									"resource": "{output_dir}/md_0_1_trjconv.xtc",
+									"type": "file",
+									"format": "TXT"
+								}
+							}
+						]
+					},
+					{
+						"step_id": "14b_trjconv",
+						"type": "trjconv",
+						"execution": {
+							"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+						},
+						"settings": {
+							"arguments": {
+								"flags": [],
+								"parameters": {
+									"-fit": "rot+trans",
+									"-n": "index.ndx"
+								}
+							},
+							"additional": {
+								"pipe_input": "Protein_Other System"
+							}
+						},
+						"input": {
+							"generic": [
+								{
+									"source": "14_trjconv",
+									"extension": "xtc"
+								},
+								{
+									"source": "12_prod_md_grompp",
+									"extension": "tpr"
+								},
+								{
+									"source": "12_prod_md_grompp",
+									"extension": "ndx"
+								}
+							]
+						}
+					},
+					{
+						"step_id": "15_gmx_MMPBSA",
+						"type": "gmx_mmpbsa",
+						"execution": {
+							"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2 && module load gmx_MMPBSA/1.3.3-fosscuda-2019a-Python-3.7.2"
+						},
+						"settings": {
+							"arguments": {
+								"parameters": {}
+							},
+							"additional": {
+								"coupling groups": "Protein Other",
+								"forcefield": "<fill_path>/amber14sb.ff"
+							}
+						},
+						"input": {
+							"generic": [
+								{
+									"source": "14b_trjconv",
+									"extension": "xtc"
+								},
+								{
+									"source": "13_prod_mdrun",
+									"extension": "tpr"
+								},
+								{
+									"source": "13_prod_mdrun",
+									"extension": "gro"
+								},
+								{
+									"source": "12_prod_md_grompp",
+									"extension": "top"
+								},
+								{
+									"source": "12_prod_md_grompp",
+									"extension": "itp"
+								}
+							]
+						},
+						"writeout": [
+							{
+								"generic": {
+									"key": "dat"
+								},
+								"destination": {
+									"resource": "{output_dir}/FINAL_RESULTS_MMPBSA.dat",
+									"type": "file",
+									"format": "TXT"
+								}
+							}
+						]
+					}
+				],
+				"iter_settings": {
+					"iter_mode": "n_iters",
+					"n_iters": 25,
+					"parallelizer_settings": {
+						"parallelize": true,
+						"dependent_steps": 11,
+						"cores": 14
+					}
+				}
+			}
+		]
+	}
+}
\ No newline at end of file
diff --git a/examples/workflow/gromacs/gromacs_fpocket.json b/examples/workflow/gromacs/gromacs_fpocket.json
new file mode 100644
index 0000000..9642647
--- /dev/null
+++ b/examples/workflow/gromacs/gromacs_fpocket.json
@@ -0,0 +1,656 @@
+{
+	"workflow": {
+		"header": {
+			"workflow_id": "gromacs_md_fpocket",
+			"description": "full md run with gromacs, with MDpocket run to extract descriptors for binding pockets",
+			"environment": {
+				"export": [
+					{
+						"key": "GMX_GPU_DD_COMMS",
+						"value": "true"
+					},
+					{
+						"key": "GMX_GPU_PME_PP_COMMS",
+						"value": "true"
+					},
+					{
+						"key": "GMX_FORCE_UPDATE_DEFAULT_GPU",
+						"value": "true"
+					},
+					{
+						"key": "ACPYPE",
+						"value": "${ACPYPE}/acpype"
+					}
+				]
+			},
+			"global_variables": {
+				"file_base": "{entrypoint_dir}/../IcolosData/gromacs/protein",
+				"output_dir": "{entrypoint_dir}/tests/junk/gromacs"
+			}
+		},
+		"steps": [
+			{
+				"step_id": "01_pdb2gmx",
+				"type": "pdb2gmx",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [
+							"-ignh"
+						],
+						"parameters": {
+							"-water": "tip3p",
+							"-ff": "amber03"
+						}
+					},
+					"additional": {
+						"removeres": [
+							"DMP"
+						]
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "{file_base}/1BVG.pdb",
+							"extension": "pdb"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "02_editconf",
+				"type": "editconf",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [
+							"-c"
+						],
+						"parameters": {
+							"-d": "1.5",
+							"-bt": "dodecahedron"
+						}
+					},
+					"additional": {}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "01_pdb2gmx",
+							"extension": "gro"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "03_solvate",
+				"type": "solvate",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {
+							"-cs": "spc216.gro"
+						}
+					},
+					"additional": {}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "02_editconf",
+							"extension": "gro"
+						},
+						{
+							"source": "01_pdb2gmx",
+							"extension": "top"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "04_grompp",
+				"type": "grompp",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {
+						"-r": false
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "03_solvate",
+							"extension": "gro"
+						},
+						{
+							"source": "{file_base}/ions.mdp",
+							"extension": "mdp"
+						},
+						{
+							"source": "03_solvate",
+							"extension": "top"
+						},
+						{
+							"source": "01_pdb2gmx",
+							"extension": "itp"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "05_genion",
+				"type": "genion",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [
+							"-neutral"
+						],
+						"parameters": {
+							"-pname": "NA",
+							"-nname": "CL"
+						}
+					},
+					"additional": {
+						"pipe_input": "SOL"
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "04_grompp",
+							"extension": "tpr"
+						},
+						{
+							"source": "04_grompp",
+							"extension": "top"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "06_grompp_eminim",
+				"type": "grompp",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {
+						"-r": false
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "05_genion",
+							"extension": "gro"
+						},
+						{
+							"source": "{file_base}/minim.mdp",
+							"extension": "mdp"
+						},
+						{
+							"source": "05_genion",
+							"extension": "top"
+						},
+						{
+							"source": "01_pdb2gmx",
+							"extension": "itp"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "07_eminim_mdrun",
+				"type": "mdrun",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "06_grompp_eminim",
+							"extension": "tpr"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "08_nvt_grompp",
+				"type": "grompp",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {
+							"-n": "index.ndx"
+						}
+					},
+					"additional": {
+						"-r": true,
+						"make_ndx_command": "auto"
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "07_eminim_mdrun",
+							"extension": "gro"
+						},
+						{
+							"source": "05_genion",
+							"extension": "top"
+						},
+						{
+							"source": "{file_base}/nvt_equil.mdp",
+							"extension": "mdp"
+						},
+						{
+							"source": "01_pdb2gmx",
+							"extension": "itp"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "09_nvt_mdrun",
+				"type": "mdrun",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "08_nvt_grompp",
+							"extension": "tpr"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "10_npt_grompp",
+				"type": "grompp",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {
+							"-n": "index.ndx"
+						}
+					},
+					"additional": {
+						"-r": true,
+						"make_ndx_command": "auto"
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "09_nvt_mdrun",
+							"extension": "gro"
+						},
+						{
+							"source": "05_genion",
+							"extension": "top"
+						},
+						{
+							"source": "{file_base}/npt_equil.mdp",
+							"extension": "mdp"
+						},
+						{
+							"source": "01_pdb2gmx",
+							"extension": "itp"
+						},
+						{
+							"source": "08_nvt_grompp",
+							"extension": "ndx"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "11_npt_mdrun",
+				"type": "mdrun",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "10_npt_grompp",
+							"extension": "tpr"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "12_prod_md_grompp",
+				"type": "grompp",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {
+							"-n": "index.ndx"
+						}
+					},
+					"additional": {
+						"-r": false,
+						"fields": {
+							"nsteps": "500"
+						},
+						"make_ndx_command": "auto"
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "11_npt_mdrun",
+							"extension": "gro"
+						},
+						{
+							"source": "05_genion",
+							"extension": "top"
+						},
+						{
+							"source": "{file_base}/md.mdp",
+							"extension": "mdp"
+						},
+						{
+							"source": "01_pdb2gmx",
+							"extension": "itp"
+						},
+						{
+							"source": "08_nvt_grompp",
+							"extension": "ndx"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "13_prod_mdrun",
+				"type": "mdrun",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {
+							"-nb": "gpu",
+							"-bonded": "gpu",
+							"-pme": "gpu",
+							"-c": "structure.pdb"
+						}
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "12_prod_md_grompp",
+							"extension": "tpr"
+						}
+					]
+				},
+				"writeout": [
+					{
+						"generic": {
+							"key": "xtc"
+						},
+						"destination": {
+							"resource": "{output_dir}/md_0_1.xtc",
+							"type": "file",
+							"format": "txt"
+						}
+					},
+					{
+						"generic": {
+							"key": "log"
+						},
+						"destination": {
+							"resource": "{output_dir}/md_0_1.log",
+							"type": "file",
+							"format": "txt"
+						}
+					},
+					{
+						"generic": {
+							"key": "pdb"
+						},
+						"destination": {
+							"resource": "{output_dir}/md_0_1.pdb",
+							"type": "file",
+							"format": "txt"
+						}
+					},
+					{
+						"generic": {
+							"key": "tpr"
+						},
+						"destination": {
+							"resource": "{output_dir}/md_0_1.tpr",
+							"type": "file",
+							"format": "txt"
+						}
+					}
+				]
+			},
+			{
+				"step_id": "14_trjconv",
+				"type": "trjconv",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [
+							"-center"
+						],
+						"parameters": {
+							"-pbc": "nojump"
+						}
+					},
+					"additional": {
+						"pipe_input": "Protein Protein"
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "13_prod_mdrun",
+							"extension": "xtc"
+						},
+						{
+							"source": "13_prod_mdrun",
+							"extension": "tpr"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "15_trjconv",
+				"type": "trjconv",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {
+							"-fit": "rot+trans"
+						}
+					},
+					"additional": {
+						"pipe_input": "Protein Protein"
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "14_trjconv",
+							"extension": "xtc"
+						},
+						{
+							"source": "14_trjconv",
+							"extension": "tpr"
+						}
+					]
+				},
+				"writeout": [
+					{
+						"generic": {
+							"key": "xtc"
+						},
+						"destination": {
+							"resource": "{output_dir}/md_0_1_trjconv.xtc",
+							"type": "file",
+							"format": "txt"
+						}
+					}
+				]
+			},
+			{
+				"step_id": "16_editconf",
+				"type": "editconf",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {
+							"-n": "index.ndx",
+							"-o": "structure.pdb"
+						}
+					},
+					"additional": {
+						"pipe_input": "Protein"
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "12_prod_md_grompp",
+							"extension": "gro"
+						},
+						{
+							"source": "12_prod_md_grompp",
+							"extension": "ndx"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "17_MDpocket",
+				"type": "mdpocket",
+				"execution": {
+					"prefix_execution": "module load fpocket"
+					
+				},
+				"settings": {
+					"arguments": {
+						"parameters": {}
+					},
+					"additional": {
+						"format": "gromacs"
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "15_trjconv",
+							"extension": "xtc"
+						},
+						{
+							"source": "16_editconf",
+							"extension": "pdb"
+						}
+					]
+				},
+				"writeout": [
+					{
+						"generic": {
+							"key": "pdb"
+						},
+						"destination": {
+							"resource": "{output_dir}",
+							"type": "file",
+							"format": "txt",
+							"mode": "dir"
+						}
+					},
+					{
+						"generic": {
+							"key": "txt"
+						},
+						"destination": {
+							"resource": "{output_dir}",
+							"type": "file",
+							"format": "txt",
+							"mode": "dir"
+						}
+					},
+					{
+						"generic": {
+							"key": "dx"
+						},
+						"destination": {
+							"resource": "{output_dir}",
+							"type": "file",
+							"format": "txt",
+							"mode": "dir"
+						}
+					}
+				]
+			}
+		]
+	}
+}
\ No newline at end of file
diff --git a/examples/workflow/gromacs/gromacs_md.json b/examples/workflow/gromacs/gromacs_md.json
new file mode 100644
index 0000000..17a2d40
--- /dev/null
+++ b/examples/workflow/gromacs/gromacs_md.json
@@ -0,0 +1,488 @@
+{
+    "workflow": {
+        "header": {
+            "workflow_id": "gromacs_test",
+            "description": "full md run with gromacs",
+            "environment": {
+                "export": [{
+                    "key": "GMX_GPU_DD_COMMS",
+                    "value": "true"
+                }, {
+                    "key": "GMX_GPU_PME_PP_COMMS",
+                    "value": "true"
+                }, {
+                    "key": "GMX_FORCE_UPDATE_DEFAULT_GPU",
+                    "value": "true"
+                }, {
+                    "key": "ACPYPE",
+                    "value": "${ACPYPE}/acpype"
+                }
+                ]
+            },
+            "global_variables": {
+                "file_base": "{entrypoint_dir}/../IcolosData/gromacs/protein",
+                "output_dir": "{entrypoint_dir}/tests/junk/gromacs"
+            }
+        },
+        "steps": [{
+            "step_id": "01_pdb2gmx",
+            "type": "pdb2gmx",
+            "execution": {
+                "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+
+            },
+            "settings": {
+                "arguments": {
+                    "flags": ["-ignh"],
+                    "parameters": {
+                        "-water": "tip3p",
+                        "-ff": "amber03"
+                    }
+                },
+                "additional": {
+                }
+            },
+            "input": {
+                "generic": [
+                    {
+                        "source": "{file_base}/1BVG.pdb",
+                        "extension": "pdb"
+                    }
+                ]}
+        },{
+            "step_id": "02_editconf",
+            "type": "editconf",
+            "execution": {
+                "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+            },
+            "settings": {
+                "arguments": {
+                    "flags": ["-c"],
+                    "parameters": {
+                        "-d": "1.0",
+                        "-bt": "dodecahedron"
+                    }
+
+                },
+                "additional": {
+                }
+            },
+            "input": {
+                "generic": [
+                    {
+                        "source": "01_pdb2gmx",
+                        "extension": "gro"
+                    }
+
+                ]}
+
+        },{
+            "step_id": "03_solvate",
+            "type": "solvate",
+            "execution": {
+                "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+            },
+            "settings": {
+                "arguments": {
+                    "flags": [],
+                    "parameters": {
+                        "-cs": "spc216"
+                    }
+                },
+                "additional": {
+                }
+            },
+            "input": {
+                "generic": [
+                    {
+                        "source": "02_editconf",
+                        "extension": "gro"
+                    },{
+                        "source": "01_pdb2gmx",
+                        "extension": "top"
+                    }
+
+                ]}
+
+        },
+            {
+                "step_id": "04_grompp",
+                "type": "grompp",
+                "execution": {
+                    "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                        }
+                    },
+                    "additional": {
+                        "-r": false
+                    }
+                },
+                "input": {
+                    "generic": [
+                        {
+                            "source": "03_solvate",
+                            "extension": "gro"
+                        },{
+                            "source": "{file_base}/ions.mdp",
+                            "extension": "mdp"
+                        },{
+                            "source": "03_solvate",
+                            "extension": "top"
+                        },{
+                            "source": "01_pdb2gmx",
+                            "extension": "itp"
+                        }
+                    ]}
+
+            },{
+                "step_id": "05_genion",
+                "type": "genion",
+                "execution": {
+                    "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": ["-neutral"],
+                        "parameters": {
+                            "-pname": "NA",
+                            "-nname": "CL"
+                        }
+                    },
+                    "additional": {
+                        "pipe_input": "SOL"
+                    }
+                },
+                "input": {
+                    "generic": [
+                        {
+                            "source": "04_grompp",
+                            "extension": "tpr"
+                        },{
+                            "source": "04_grompp",
+                            "extension": "top"
+                        },{
+                            "source": "04_grompp",
+                            "extension": "itp"
+                        }
+                    ]}
+
+            },{
+                "step_id": "06_grompp_eminim",
+                "type": "grompp",
+                "execution": {
+                    "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+
+                        }
+                    },
+                    "additional": {
+                        "-r": false
+                    }
+                },
+                "input": {
+                    "generic": [
+                        {
+                            "source": "05_genion",
+                            "extension": "gro"
+                        },{
+                            "source": "{file_base}/minim.mdp",
+                            "extension": "mdp"
+                        },{
+                            "source": "05_genion",
+                            "extension": "top"
+                        },{
+                            "source": "05_genion",
+                            "extension": "itp"
+                        }
+
+                    ]}
+            },{
+                "step_id": "07_eminim_mdrun",
+                "type": "mdrun",
+                "execution": {
+                    "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+
+                        }
+                    },
+                    "additional": {
+                    }
+                },
+                "input": {
+                    "generic": [
+                        {
+                            "source": "06_grompp_eminim",
+                            "extension": "tpr"
+                        }
+                    ]}
+            },{
+                "step_id": "08_nvt_grompp",
+                "type": "grompp",
+                "execution": {
+                    "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "-n": "index.ndx"
+                        }
+                    },
+                    "additional": {
+                        "make_ndx_command": "auto",
+                        "-r": true
+                    }
+                },
+                "input": {
+                    "generic": [
+                        {
+                            "source": "07_eminim_mdrun",
+                            "extension": "gro"
+                        },{
+                            "source": "05_genion",
+                            "extension": "top"
+                        },{
+                            "source": "{file_base}/nvt_equil.mdp",
+                            "extension": "mdp"
+                        },{
+                            "source": "01_pdb2gmx",
+                            "extension": "itp"
+                        }
+                    ]}
+            },{
+                "step_id": "09_nvt_mdrun",
+                "type": "mdrun",
+                "execution": {
+                    "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                        }
+                    },
+                    "additional": {
+                    }
+                },
+                "input": {
+                    "generic": [
+                        {
+                            "source": "08_nvt_grompp",
+                            "extension": "tpr"
+                        }
+                    ]}
+            },{
+                "step_id": "10_npt_grompp",
+                "type": "grompp",
+                "execution": {
+                    "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "-n": "index.ndx"
+                        }
+                    },
+                    "additional": {
+                        "make_ndx_command": "auto",
+                        "-r": true
+                    }
+                },
+                "input": {
+                    "generic": [
+                        {
+                            "source": "09_nvt_mdrun",
+                            "extension": "gro"
+                        },{
+                            "source": "05_genion",
+                            "extension": "top"
+                        },{
+                            "source": "{file_base}/npt_equil.mdp",
+                            "extension": "mdp"
+                        },{
+                            "source": "01_pdb2gmx",
+                            "extension": "itp"
+                        }
+                    ]}
+            },{
+                "step_id": "11_npt_mdrun",
+                "type": "mdrun",
+                "execution": {
+                    "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                        }
+                    },
+                    "additional": {
+                        "make_ndx_command": "auto"
+                    }
+                },
+                "input": {
+                    "generic": [
+                        {
+                            "source": "10_npt_grompp",
+                            "extension": "tpr"
+                        }
+                    ]}
+
+            },{
+                "step_id": "12_prod_md_grompp",
+                "type": "grompp",
+                "execution": {
+                    "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "-n": "index.ndx"
+                        }
+                    },
+                    "additional": {
+                        "fields": {
+                            "nsteps": "50000"
+                        },
+                        "make_ndx_command": "auto",
+                        "-r": false
+                    }
+                },
+                "input": {
+                    "generic": [
+                        {
+                            "source": "11_npt_mdrun",
+                            "extension": "gro"
+                        },{
+                            "source": "05_genion",
+                            "extension": "top"
+                        },{
+                            "source": "{file_base}/md.mdp",
+                            "extension": "mdp"
+                        },{
+                            "source": "01_pdb2gmx",
+                            "extension": "itp"
+                        }
+                    ]
+                }
+            },{
+                "step_id": "13_prod_mdrun",
+                "type": "mdrun",
+                "execution": {
+                    "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "-nb": "gpu",
+                            "-bonded": "gpu"
+                        }
+                    }
+                },
+                "input": {
+                    "generic": [
+                        {
+                            "source": "12_prod_md_grompp",
+                            "extension": "tpr"
+                        }
+                    ]
+                },
+                "writeout": [
+                    {
+                        "generic": {
+                            "key": "xtc"
+                        },
+                        "destination": {
+                            "resource": "{output_dir}/md_0_1.xtc",
+                            "type": "file",
+                            "format": "TXT"
+                        }
+                    },
+                    {
+                        "generic": {
+                            "key": "log"
+                        },
+                        "destination": {
+                            "resource": "{output_dir}/md_0_1.log",
+                            "type": "file",
+                            "format": "TXT"
+                        }
+                    },
+                    {
+                        "generic": {
+                            "key": "gro"
+                        },
+                        "destination": {
+                            "resource": "{output_dir}/md_0_1.gro",
+                            "type": "file",
+                            "format": "TXT"
+                        }
+                    },
+                    {
+                        "generic": {
+                            "key": "tpr"
+                        },
+                        "destination": {
+                            "resource": "{output_dir}/md_0_1.tpr",
+                            "type": "file",
+                            "format": "TXT"
+                        }
+                    }
+                ]
+            },{
+                "step_id": "14_trjconv",
+                "type": "trjconv",
+                "execution":{
+                    "prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+                },
+                "settings":{
+                    "arguments":{
+                        "flags":["-center"]
+                    },
+                    "additional":{
+                        "pipe_input": "Protein System"
+                    }
+                },
+                "input":{
+                    "generic":[
+                        {
+                            "source": "13_prod_mdrun",
+                            "extension": "xtc"
+                        },
+                        {
+                            "source": "13_prod_mdrun",
+                            "extension": "tpr"
+                        }
+                    ]
+                },
+                "writeout":[
+                    {
+                        "generic": {
+                            "key": "xtc"
+                        },
+                        "destination": {
+                            "resource": "{output_dir}/md_0_1_trjconv.xtc",
+                            "type": "file",
+                            "format": "TXT"
+                        }
+                    }
+                ]
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/examples/workflow/gromacs/gromacs_mmgbsa.json b/examples/workflow/gromacs/gromacs_mmgbsa.json
new file mode 100644
index 0000000..0ed2725
--- /dev/null
+++ b/examples/workflow/gromacs/gromacs_mmgbsa.json
@@ -0,0 +1,580 @@
+{
+	"workflow": {
+		"header": {
+			"workflow_id": "gromacs_test",
+			"description": "full md run with gromacs with ligand parametrisation and mmgbsa calculation",
+			"environment": {
+				"export": [
+					{
+						"key": "GMX_GPU_DD_COMMS",
+						"value": "true"
+					},
+					{
+						"key": "GMX_GPU_PME_PP_COMMS",
+						"value": "true"
+					},
+					{
+						"key": "GMX_FORCE_UPDATE_DEFAULT_GPU",
+						"value": "true"
+					},
+					{
+						"key": "ACPYPE",
+						"value": "${ACPYPE}/acpype"
+					}
+
+					
+				]
+			},
+			"global_variables": {
+				"file_base": "{entrypoint_dir}/../IcolosData/gromacs/protein",
+				"output_dir": "{entrypoint_dir}/tests/junk/gromacs"
+
+			}
+		},
+		"steps": [
+			{
+				"step_id": "01_pdb2gmx",
+				"type": "pdb2gmx",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [
+							"-ignh"
+						],
+						"parameters": {
+							"-water": "tip3p",
+							"-ff": "amber14sb"
+						}
+					},
+					"additional": {
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "{file_base}/1BVG.pdb",
+							"extension": "pdb"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "02_editconf",
+				"type": "editconf",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [
+							"-c"
+						],
+						"parameters": {
+							"-d": "1.0",
+							"-bt": "dodecahedron"
+						}
+					},
+					"additional": {}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "01_pdb2gmx",
+							"extension": "gro"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "03_solvate",
+				"type": "solvate",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {
+							"-cs": "spc216.gro"
+						}
+					},
+					"additional": {}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "02_editconf",
+							"extension": "gro"
+						},
+						{
+							"source": "01_pdb2gmx",
+							"extension": "top"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "04_grompp",
+				"type": "grompp",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {
+						"-r": false
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "03_solvate",
+							"extension": "gro"
+						},
+						{
+							"source": "{file_base}/ions.mdp",
+							"extension": "mdp"
+						},
+						{
+							"source": "03_solvate",
+							"extension": "top"
+						},
+						{
+							"source": "01_pdb2gmx",
+							"extension": "itp"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "05_genion",
+				"type": "genion",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [
+							"-neutral"
+						],
+						"parameters": {
+							"-pname": "NA",
+							"-nname": "CL"
+						}
+					},
+					"additional": {
+						"pipe_input": "SOL"
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "04_grompp",
+							"extension": "tpr"
+						},
+						{
+							"source": "04_grompp",
+							"extension": "top"
+						},
+						{
+							"source": "04_grompp",
+							"extension": "itp"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "06_grompp_eminim",
+				"type": "grompp",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {
+						"-r": false
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "05_genion",
+							"extension": "gro"
+						},
+						{
+							"source": "{file_base}/minim.mdp",
+							"extension": "mdp"
+						},
+						{
+							"source": "05_genion",
+							"extension": "top"
+						},
+						{
+							"source": "05_genion",
+							"extension": "itp"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "07_eminim_mdrun",
+				"type": "mdrun",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "06_grompp_eminim",
+							"extension": "tpr"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "08_nvt_grompp",
+				"type": "grompp",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {
+							"-n": "index.ndx"
+						}
+					},
+					"additional": {
+						"make_ndx_command": "auto",
+						"-r": true
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "07_eminim_mdrun",
+							"extension": "gro"
+						},
+						{
+							"source": "05_genion",
+							"extension": "top"
+						},
+						{
+							"source": "{file_base}/nvt_equil.mdp",
+							"extension": "mdp"
+						},
+						{
+							"source": "01_pdb2gmx",
+							"extension": "itp"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "09_nvt_mdrun",
+				"type": "mdrun",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "08_nvt_grompp",
+							"extension": "tpr"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "10_npt_grompp",
+				"type": "grompp",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {
+							"-n": "index.ndx"
+						}
+					},
+					"additional": {
+						"make_ndx_command": "auto",
+						"-r": true
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "09_nvt_mdrun",
+							"extension": "gro"
+						},
+						{
+							"source": "05_genion",
+							"extension": "top"
+						},
+						{
+							"source": "{file_base}/npt_equil.mdp",
+							"extension": "mdp"
+						},
+						{
+							"source": "01_pdb2gmx",
+							"extension": "itp"
+						},
+						{
+							"source": "08_nvt_grompp",
+							"extension": "ndx"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "11_npt_mdrun",
+				"type": "mdrun",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "10_npt_grompp",
+							"extension": "tpr"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "12_prod_md_grompp",
+				"type": "grompp",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {
+							"-n": "index.ndx"
+						}
+					},
+					"additional": {
+						"fields": {
+							"nsteps": "50000"
+						},
+						"make_ndx_command": "auto",
+						"-r": false
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "11_npt_mdrun",
+							"extension": "gro"
+						},
+						{
+							"source": "05_genion",
+							"extension": "top"
+						},
+						{
+							"source": "{file_base}/md.mdp",
+							"extension": "mdp"
+						},
+						{
+							"source": "01_pdb2gmx",
+							"extension": "itp"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "13_prod_mdrun",
+				"type": "mdrun",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {
+							"-nb": "gpu",
+							"-bonded": "gpu",
+							"-pme": "gpu"
+						}
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "12_prod_md_grompp",
+							"extension": "tpr"
+						}
+					]
+				},
+				"writeout": [
+					{
+						"generic": {
+							"key": "xtc"
+						},
+						"destination": {
+							"resource": "{output_dir}/md_0_1.xtc",
+							"type": "file",
+							"format": "TXT"
+						}
+					},
+					{
+						"generic": {
+							"key": "log"
+						},
+						"destination": {
+							"resource": "{output_dir}/md_0_1.log",
+							"type": "file",
+							"format": "TXT"
+						}
+					},
+					{
+						"generic": {
+							"key": "gro"
+						},
+						"destination": {
+							"resource": "{output_dir}/md_0_1.gro",
+							"type": "file",
+							"format": "TXT"
+						}
+					},
+					{
+						"generic": {
+							"key": "tpr"
+						},
+						"destination": {
+							"resource": "{output_dir}/md_0_1.tpr",
+							"type": "file",
+							"format": "TXT"
+						}
+					}
+				]
+			},
+			{
+				"step_id": "14_trjconv",
+				"type": "trjconv",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2020.3-fosscuda-2019a"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [
+							"-center"
+						]
+					},
+					"additional": {
+						"pipe_input": "System System"
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "13_prod_mdrun",
+							"extension": "xtc"
+						},
+						{
+							"source": "13_prod_mdrun",
+							"extension": "tpr"
+						}
+					]
+				},
+				"writeout": [
+					{
+						"generic": {
+							"key": "xtc"
+						},
+						"destination": {
+							"resource": "{output_dir}/md_0_1_trjconv.xtc",
+							"type": "file",
+							"format": "TXT"
+						}
+					}
+				]
+			},
+			{
+				"step_id": "15_gmx_MMPBSA",
+				"type": "gmx_mmpbsa",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2 && module load gmx_MMPBSA/1.3.3-fosscuda-2019a-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"parameters": {
+						}
+					},
+					"additional": {
+						"coupling_groups": "Protein Other",
+						"forcefield": "<fill_path>/amber14sb.ff"
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "14_trjconv",
+							"extension": "xtc"
+						},
+						{
+							"source": "13_prod_mdrun",
+							"extension": "tpr"
+						},
+						{
+							"source": "13_prod_mdrun",
+							"extension": "gro"
+						},
+						{
+							"source": "12_prod_md_grompp",
+							"extension": "top"
+						},
+						{
+							"source": "12_prod_md_grompp",
+							"extension": "itp"
+						}
+					]
+				},
+				"writeout": [
+					{
+						"generic": {
+							"key": "dat"
+						},
+						"destination": {
+							"resource": "{output_dir}/FINAL_RESULTS_MMPBSA.pdb",
+							"type": "file",
+							"format": "TXT"
+						}
+					}
+				]
+			}
+		]
+	}
+}
\ No newline at end of file
diff --git a/examples/workflow/input_file_types.json b/examples/workflow/input_file_types.json
new file mode 100644
index 0000000..476e7e9
--- /dev/null
+++ b/examples/workflow/input_file_types.json
@@ -0,0 +1,91 @@
+{
+    "workflow": {
+        "header": {
+            "id": "input_file_types_example",
+            "description": "This configuration illustrates the use of different input file types.",
+            "logging": {
+              "logfile": "tests/junk/input_file_types.log"
+            },
+            "environment": {
+                "export": [{
+                        "key": "XTBHOME",
+                        "value": "/opt/scp/services/reinvent/Icolos/binaries/xtb-6.3.2"
+                    }, {
+                        "key": "XTBPATH",
+                        "value": "${XTBHOME}/share/xtb"
+                    }, {
+                        "key": "PATH",
+                        "value": "${PATH}:${XTBHOME}/bin"
+                    }, {
+                        "key": "PKG_CONFIG_PATH",
+                        "value": "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig"
+                    }
+                ]
+            }
+        },
+        "steps": [{
+                "step_id": "01_initialization_from_SDF",
+                "type": "initialization",
+                "input": {
+                  "compounds": [
+                    {
+                      "source": "{entrypoint_dir}/../IcolosData/molecules/aspirin.sdf",
+                      "source_type": "file",
+                      "format": "SDF"
+                    }
+                  ]
+                }
+            },
+            {
+                "step_id": "01_initialization_SMI",
+                "type": "initialization",
+                "input": {
+                  "compounds": [
+                    {
+                      "source": "{entrypoint_dir}/../IcolosData/molecules/paracetamol.smi",
+                      "source_type": "file",
+                      "format": "SMI"
+                    }
+                  ]
+                }
+            },
+            {
+                "step_id": "01_initialization_CSV",
+                "type": "initialization",
+                "input": {
+                  "compounds": [
+                    {
+                      "source": "{entrypoint_dir}/../IcolosData/molecules/small_molecules.csv",
+                      "columns": {
+                        "smiles": "SMILES",
+                        "names": "name"
+                      },
+                      "source_type": "file",
+                      "format": "CSV"
+                    }
+                  ]
+                }
+            },
+            {
+                "step_id": "02_embed_SMI",
+                "type": "embedding",
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "method": "rdkit"
+                        }
+                    }
+                },
+                "input": {
+                  "compounds": [
+                    {
+                      "source": "01_initialization_SMI",
+                      "source_type": "step"
+                    }
+                  ]
+                }
+            }
+        ]
+    }
+}
diff --git a/examples/workflow/pmx/pmx_rbfe.json b/examples/workflow/pmx/pmx_rbfe.json
new file mode 100644
index 0000000..cbfad71
--- /dev/null
+++ b/examples/workflow/pmx/pmx_rbfe.json
@@ -0,0 +1,376 @@
+{
+	"workflow": {
+		"header": {
+			"workflow_id": "Full PMX workflow - rbfe",
+			"description": "PMX full map calculation with parallel GPU sims.",
+			"environment": {
+				"export": [
+					{
+						"key": "GMX_GPU_DD_COMMS",
+						"value": "true"
+					},
+					{
+						"key": "GMX_GPU_PME_PP_COMMS",
+						"value": "true"
+					},
+					{
+						"key": "GMX_FORCE_UPDATE_DEFAULT_GPU",
+						"value": "true"
+					},
+					{
+						"key": "ACPYPE",
+						"value": "${ACPYPE}/acpype"
+					},
+					{
+						"key": "PMX_PYTHON",
+						"value": "${CONDA}/envs/pmx/bin/python"
+					},
+					{
+						"key": "PMX",
+						"value": "${CONDA}/envs/pmx/bin/pmx"
+					},
+					{
+						"key": "GMXLIB",
+						"value": "${PMX}/src/pmx/data/mutff"
+					}
+				]
+			},
+			"global_variables": {
+				"file_path": "{entrypoint_dir}/../IcolosData/pmx",
+				"output_dir": "{work_dir}/tests/junk/pmx"
+			},
+			"global_settings": {
+				"single_directory": true,
+				"remove_temporary_files": false
+			}
+		},
+		"steps": [
+			{
+				"step_id": "fep_setup",
+				"type": "fep_plus_setup",
+				"execution": {
+					"prefix_execution": "module load schrodinger/2021-2-js-aws"
+				},
+				"settings": {
+					"arguments": {
+						"parameters": {
+							"-num-procs": 24
+						}
+					}
+				},
+				"input": {
+					"compounds": [
+						{
+							"source": "{file_path}/compounds.sdf",
+							"source_type": "file",
+							"format": "SDF"
+						}
+					],
+					"generic": [
+						{
+							"source": "{file_path}/receptor.pdb",
+							"extension": "pdb"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "01_pmx_setup",
+				"type": "pmx_setup",
+				"execution": {
+					"parallelization": {
+						"cores": 24
+					}
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {
+						"water": "tip3p",
+						"forcefield": "amber99sb-star-ildn-mut",
+						"replicas": 1
+					}
+				},
+				"input": {
+					"generic": [
+						{
+							"source": "fep_setup",
+							"extension": "log"
+						},
+						{
+							"source": "{file_path}/receptor.pdb",
+							"extension": "pdb"
+						},
+						{
+							"source": "{file_path}/mdppath/",
+							"extension": "mdp"
+						}
+					],
+					"compounds": [
+						{
+							"source": "fep_setup",
+							"source_type": "step"
+						}
+					],
+					"work_dir": "{output_dir}"
+				}
+			},
+			{
+				"step_id": "02_pmx_atomMapping",
+				"type": "pmx_atomMapping",
+				"execution": {
+					"parallelization": {
+						"cores": 24
+					}
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {
+						"water": "tip3p",
+						"forcefield": "amber99sb-star-ildn-mut"
+					}
+				},
+				"input": {
+					"perturbation_map": [
+						{
+							"source": "01_pmx_setup"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "03_pmx_ligandHybrid",
+				"type": "pmx_ligandHybrid",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {
+							"-cs": "spc216.gro"
+						}
+					},
+					"additional": {}
+				},
+				"input": {
+					"perturbation_map": [
+						{
+							"source": "02_pmx_atomMapping"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "04_assemble_systems",
+				"type": "pmx_assemble_systems",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2",
+					"binary_location": "$PMX_PYTHON <fill_pmx_path>/src/pmx/scripts/icolos_entrypoints/",
+					"parallelization": {
+						"cores": 24
+					}
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {
+						"edges": []
+					}
+				},
+				"input": {
+					"perturbation_map": [
+						{
+							"source": "03_pmx_ligandHybrid"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "05_box_water_ions",
+				"type": "pmx_box_water_ions",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2",
+					"parallelization": {
+						"cores": 24
+					}
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {}
+				},
+				"input": {
+					"perturbation_map": [
+						{
+							"source": "04_assemble_systems"
+						}
+					]
+				}
+			},
+			{
+				"step_id": "06_prepare_simulations",
+				"type": "pmx_prepare_simulations",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2",
+					"parallelization": {
+						"cores": 24
+					}
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {
+						"sim_type": "em"
+					}
+				}
+			},
+			{
+				"step_id": "06b_run_simulations",
+				"type": "pmx_run_simulations",
+				"execution": {
+					"resource": "slurm",
+					"resources": {
+						"partition": "gpu",
+						"gres": "gpu:1",
+						"modules": [
+							"GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+						]
+					},
+					"parallelization": {
+						"cores": 16
+					}
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {
+						"sim_type": "em"
+					}
+				}
+			},
+			{
+				"step_id": "07_prepare_simulations",
+				"type": "pmx_prepare_simulations",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2",
+					"parallelization": {
+						"cores": 24
+					}
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {
+						"sim_type": "eq"
+					}
+				}
+			},
+			{
+				"step_id": "07b_run_simulations",
+				"type": "pmx_run_simulations",
+				"execution": {
+					"resource": "slurm",
+					"resources": {
+						"partition": "gpu",
+						"gres": "gpu:1",
+						"modules": [
+							"GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+						]
+					},
+					"parallelization": {
+						"cores": 16
+					}
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {
+						"sim_type": "eq"
+					}
+				}
+			},
+			{
+				"step_id": "08_prep_transitions",
+				"type": "pmx_prepare_transitions",
+				"execution": {
+					"prefix_execution": "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2",
+					"parallelization": {
+						"cores": 24
+					}
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {
+						"sim_type": "transitions"
+					}
+				}
+			},
+			{
+				"step_id": "09_run_transitions",
+				"type": "pmx_run_simulations",
+				"execution": {
+					"resource": "slurm",
+					"resources": {
+						"partition": "gpu",
+						"gres": "gpu:1",
+						"modules": [
+							"GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+						]
+					},
+					"parallelization": {
+						"cores": 16
+					}
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {
+						"sim_type": "transitions"
+					}
+				},
+				"input": {
+					"generic": []
+				}
+			},
+			{
+				"step_id": "pmx_analyse",
+				"type": "pmx_run_analysis",
+				"execution": {
+					"parallelization": {
+						"cores": 24
+					}
+				},
+				"settings": {
+					"arguments": {
+						"flags": [],
+						"parameters": {}
+					},
+					"additional": {}
+				}
+			}
+		]
+	}
+}
diff --git a/examples/workflow/qm/ePSA_permeability.json b/examples/workflow/qm/ePSA_permeability.json
new file mode 100644
index 0000000..6ba4bd8
--- /dev/null
+++ b/examples/workflow/qm/ePSA_permeability.json
@@ -0,0 +1,334 @@
+{
+    "workflow": {
+        "header": {
+            "id": "ePSA_permeability_example",
+            "description": "A shortened, simplified version of the ReSCoSS workflow to calculate descriptors for molecules to predict ePSA and permeability values.",
+            "environment": {
+                "export": [{
+                        "key": "XTBHOME",
+                        "value": "/projects/cc/mai/binaries/xtb-6.4.0"
+                    }, {
+                        "key": "XTBPATH",
+                        "value": "${XTBHOME}/share/xtb"
+                    }, {
+                        "key": "PATH",
+                        "value": "${PATH}:${XTBHOME}/bin"
+                    }, {
+                        "key": "PKG_CONFIG_PATH",
+                        "value": "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig"
+                    }
+                ]
+            },
+            "global_variables": {
+                "ePSA_model_path": "/projects/cc/mai/material/Icolos/models/ePSA/2020-11-13_DFTB_RF_final.pkl",
+                "permeability_model_path": "/projects/cc/mai/material/Icolos/models/permeability/2021-03-26_DFTB_permeability_RF_final.pkl"
+            }
+        },
+        "steps": [ {
+                "step_id": "initialization",
+                "type": "initialization",
+                "input": {
+                    "compounds": [{
+                            "source": "{entrypoint_dir}/../IcolosData/molecules/paracetamol.sdf",
+                            "source_type": "file",
+                            "format": "SDF"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "omega_confgen",
+                "type": "omega",
+                "execution": {
+                    "prefix_execution": "module load omega"
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "-maxconfs": 200,
+                            "-rms": 0.8,
+                            "-canonOrder": "false"
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "initialization",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "conf_macromodel",
+                "type": "macromodel",
+                "execution": {
+                    "prefix_execution": "module load schrodinger/2019-4"
+                },
+                "token_guard": {
+                    "prefix_execution": "module load schrodinger/2019-4",
+                    "token_pools": {
+                        "MMOD_MACROMODEL": 2
+                    },
+                    "wait_interval_seconds": 30,
+                    "wait_limit_seconds": 0
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": ["-WAIT"],
+                        "parameters": {
+                            "-NJOBS": 1
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "initialization",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "RMSfiltering1",
+                "type": "rmsfilter",
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {}
+                    },
+                    "additional": {
+                        "method": "alignmol",
+                        "threshold": 1
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "omega_confgen",
+                            "source_type": "step"
+                        }, {
+                            "source": "conf_macromodel",
+                            "source_type": "step"
+                        }
+                    ],
+                    "merge": {
+                        "compounds": true,
+                        "merge_compounds_by": "name",
+                        "enumerations": true,
+                        "merge_enumerations_by": "id"
+                    }
+                }
+            }, {
+                "step_id": "conf_optXTB",
+                "type": "xtb",
+                "execution": {
+                    "binary_location": "/projects/cc/mai/binaries/xtb-6.4.0",
+                    "parallelization": {
+                        "cores": 10
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "--opt": "vtight",
+                            "--gbsa": "h2o"
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "RMSfiltering1",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "RMSfiltering2",
+                "type": "rmsfilter",
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {}
+                    },
+                    "additional": {
+                        "method": "alignmol",
+                        "threshold": 1
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "conf_optXTB",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "turbomole",
+                "type": "turbomole",
+                "execution": {
+                    "prefix_execution": "module load turbomole/73",
+                    "failure_policy": {
+                        "n_tries": 5
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {}
+                    },
+                    "additional": {
+                        "tm_config_dir": "/projects/cc/mai/material/Icolos/turbomole_config",
+                        "tm_config_basename": "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge",
+                        "tm_config_cosmo": "/projects/cc/mai/material/Icolos/turbomole_config/cosmoprep_eps80.tm",
+                        "execution_mode": "ridft"
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "RMSfiltering2",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "cosmo",
+                "type": "cosmo",
+                "execution": {
+                    "prefix_execution": "module load COSMOtherm/20.0.0"
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {}
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "turbomole",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "boltzmann_weighting",
+                "type": "boltzmann_weighting",
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "properties": [{
+                                    "input": "G_h2o",
+                                    "output": "boltzfactor_wat"
+                                }, {
+                                    "input": "G_meoh",
+                                    "output": "boltzfactor_meoh"
+                                }, {
+                                    "input": "G_octanol",
+                                    "output": "boltzfactor_octanol"
+                                }, {
+                                    "input": "G_dmso",
+                                    "output": "boltzfactor_dmso"
+                                }, {
+                                    "input": "G_cychex",
+                                    "output": "boltzfactor_cychex"
+                                }, {
+                                    "input": "G_chcl3",
+                                    "output": "boltzfactor_chcl3"
+                                }, {
+                                    "input": "G_acn",
+                                    "output": "boltzfactor_acn"
+                                }, {
+                                    "input": "G_thf",
+                                    "output": "boltzfactor_thf"
+                                }
+                            ],
+                            "weight": {
+                                "input": ["area", "HB_acc", "volume", "HB_don", "sigma2", "sigma4", "Gsolv_meoh", "Gsolv_h2o", "Gsolv_cychex", "volume"],
+                                "output_prefix": "bf_weighted",
+                                "properties": ["boltzfactor_dmso", "boltzfactor_wat",
+                                               "boltzfactor_meoh", "boltzfactor_cychex"]
+                            }
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "cosmo",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "ePSA_prediction",
+                "type": "prediction",
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {}
+                    },
+                    "additional": {
+                        "model_path": "{ePSA_model_path}",
+                        "features": ["bf_weighted_volume_boltzfactor_dmso", "bf_weighted_area_boltzfactor_dmso",
+                                     "bf_weighted_HB_acc_boltzfactor_dmso", "bf_weighted_HB_don_boltzfactor_dmso",
+                                     "bf_weighted_sigma2_boltzfactor_dmso", "bf_weighted_Gsolv_meoh_boltzfactor_dmso"],
+                        "name_predicted": "pred_ePSA"
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "boltzmann_weighting",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "permeability_prediction",
+                "type": "prediction",
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {}
+                    },
+                    "additional": {
+                        "model_path": "{permeability_model_path}",
+                        "features": ["bf_weighted_volume_boltzfactor_wat", "bf_weighted_sigma4_boltzfactor_wat", "bf_weighted_Gsolv_h2o_boltzfactor_wat",
+                                     "bf_weighted_HB_don_boltzfactor_wat", "bf_weighted_HB_acc_boltzfactor_wat", "bf_weighted_Gsolv_meoh_boltzfactor_meoh",
+                                     "bf_weighted_Gsolv_cychex_boltzfactor_cychex"],
+                        "name_predicted": "pred_permeability"
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "ePSA_prediction",
+                            "source_type": "step"
+                        }
+                    ]
+                },
+                "writeout": [{
+                        "compounds": {
+                            "category": "conformers"
+                        },
+                        "destination": {
+                            "resource": "{entrypoint_dir}/tests/junk/ePSA_permeability_final.sdf",
+                            "type": "file",
+                            "format": "SDF"
+                        }
+                    }, {
+                        "compounds": {
+                            "category": "conformers",
+                            "selected_tags": ["pred_ePSA", "pred_permeability"],
+                            "aggregation": {
+                                "mode": "best_per_compound",
+                                "key": "pred_ePSA"
+                            }
+                        },
+                        "destination": {
+                            "resource": "{entrypoint_dir}/tests/junk/ePSA.csv",
+                            "type": "file",
+                            "format": "CSV"
+                        }
+                    }
+                ]
+            }
+        ]
+    }
+}
diff --git a/examples/workflow/qm/full_rescoss.json b/examples/workflow/qm/full_rescoss.json
new file mode 100644
index 0000000..0e4a157
--- /dev/null
+++ b/examples/workflow/qm/full_rescoss.json
@@ -0,0 +1,464 @@
+{
+    "workflow": {
+        "header": {
+            "id": "rescoss",
+            "description": "Full ReSCoSS configuration (version 1.0).",
+            "environment": {
+                "export": [{
+                        "key": "XTBHOME",
+                        "value": "/projects/cc/mai/binaries/xtb-6.4.0"
+                    }, {
+                        "key": "XTBPATH",
+                        "value": "${XTBHOME}/share/xtb"
+                    }, {
+                        "key": "PATH",
+                        "value": "${PATH}:${XTBHOME}/bin"
+                    }, {
+                        "key": "PKG_CONFIG_PATH",
+                        "value": "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig"
+                    }, {
+                        "key": "PARA_ARCH",
+                        "value": "MPI"
+                    }, {
+                        "key": "PARNODES",
+                        "value": "6"
+                    }
+                ]
+            },
+            "global_variables": {
+            }
+        },
+        "steps": [{
+                "step_id": "initialization",
+                "type": "initialization",
+                "input": {
+                    "compounds": [
+                        {
+                            "source": "{entrypoint_dir}/../IcolosData/molecules/aspirin.sdf",
+                            "source_type": "file",
+                            "format": "SDF"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "omega_confgen",
+                "type": "omega",
+                "execution": {
+                    "prefix_execution": "module load omega"
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "-maxconfs": 200,
+                            "-rms": 0.8,
+                            "-canonOrder": "false"
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [
+                        {
+                            "source": "initialization",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "conf_macromodel",
+                "type": "macromodel",
+                "execution": {
+                    "prefix_execution": "module load schrodinger/2019-4"
+                },
+                "token_guard": {
+                    "prefix_execution": "module load schrodinger/2019-4",
+                    "token_pools": {
+                        "MMOD_MACROMODEL": 2
+                    },
+                    "wait_interval_seconds": 30,
+                    "wait_limit_seconds": 0
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": ["-WAIT"],
+                        "parameters": {
+                            "-NJOBS": 1
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [
+                        {
+                            "source": "initialization",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "RMSfiltering1",
+                "type": "rmsfilter",
+                "settings": {
+                  "arguments": {
+                    "flags": [],
+                    "parameters": {}
+                  },
+                  "additional": {
+                    "method": "alignmol",
+                    "threshold": 1
+                  }
+                },
+                "input": {
+                    "compounds": [
+                        {
+                            "source": "omega_confgen",
+                            "source_type": "step"
+                        },
+                        {
+                            "source": "conf_macromodel",
+                            "source_type": "step"
+                        }
+                    ],
+                    "merge": {
+                        "compounds": true,
+                        "merge_compounds_by": "name",
+                        "enumerations": true,
+                        "merge_enumerations_by": "id"
+                    }
+                }
+            }, {
+                "step_id": "conf_optXTB",
+                "type": "xtb",
+                "execution": {
+                    "binary_location": "/projects/cc/mai/binaries/xtb-6.4.0",
+                    "parallelization": {
+                        "cores": 10
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "--opt": "vtight",
+                            "--gbsa": "h2o"
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [
+                        {
+                            "source": "RMSfiltering1",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "RMSfiltering2",
+                "type": "rmsfilter",
+                "settings": {
+                  "arguments": {
+                    "flags": [],
+                    "parameters": {}
+                  },
+                  "additional": {
+                    "method": "alignmol",
+                    "threshold": 1
+                  }
+                },
+                "input": {
+                    "compounds": [
+                        {
+                            "source": "conf_optXTB",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "turbomole",
+                "type": "turbomole",
+                "execution": {
+                    "prefix_execution": "module load turbomole/73",
+                    "failure_policy": {
+                        "n_tries": 5
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                        }
+                    },
+                    "additional": {
+                      "tm_config_dir": "/projects/cc/mai/material/Icolos/turbomole_config",
+                      "tm_config_basename": "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge",
+                      "tm_config_cosmo": "/projects/cc/mai/material/Icolos/turbomole_config/cosmoprep_eps80.tm",
+                      "execution_mode": "ridft"
+                    }
+                },
+                "input": {
+                    "compounds": [
+                        {
+                            "source": "RMSfiltering2",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "cosmo",
+                "type": "cosmo",
+                "execution": {
+                    "prefix_execution": "module load COSMOtherm/20.0.0"
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [
+                        {
+                            "source": "turbomole",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "boltzmann_weighting",
+                "type": "boltzmann_weighting",
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "properties": [
+                                {
+                                    "input": "G_h2o",
+                                    "output": "boltzfactor_wat"
+                                },
+                                {
+                                    "input": "G_meoh",
+                                    "output": "boltzfactor_meoh"
+                                },
+                                {
+                                    "input": "G_octanol",
+                                    "output": "boltzfactor_octanol"
+                                },
+                                {
+                                    "input": "G_dmso",
+                                    "output": "boltzfactor_dmso"
+                                },
+                                {
+                                    "input": "G_cychex",
+                                    "output": "boltzfactor_cychex"
+                                },
+                                {
+                                    "input": "G_chcl3",
+                                    "output": "boltzfactor_chcl3"
+                                },
+                                {
+                                    "input": "G_acn",
+                                    "output": "boltzfactor_acn"
+                                },
+                                {
+                                    "input": "G_thf",
+                                    "output": "boltzfactor_thf"
+                                }
+                            ],
+                            "weight": {
+                                "input": ["area", "HB_acc", "volume", "HB_don", "sigma2", "Gsolv_meoh", "dipole"],
+                                "output_prefix": "bf_weighted",
+                                "properties": ["boltzfactor_dmso", "boltzfactor_wat",
+                                               "boltzfactor_meoh", "boltzfactor_cychex"]
+                            }
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [
+                        {
+                            "source": "cosmo",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            },
+            {
+                "step_id": "clustering",
+                "type": "clustering",
+                "settings":
+                {
+                    "arguments":
+                    {
+                        "flags": [],
+                        "parameters":
+                        {
+                            "n_clusters": 3,
+                            "max_iter": 300
+                        }
+                    },
+                    "additional":
+                    {
+                        "top_n_per_solvent": 3,
+                        "features": ["area", "dipole", "HB_acc", "HB_don"],
+                        "free_energy_solvent_tags": ["G_h2o", "G_meoh", "G_octanol",
+                                                     "G_dmso", "G_cychex", "G_acn",
+                                                     "G_thf"]
+                    }
+                },
+                "input": {
+                    "compounds": [
+                        {
+                            "source": "boltzmann_weighting",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "turbomole_opt",
+                "type": "turbomole",
+                "execution": {
+                    "prefix_execution": "module load turbomole/73",
+                    "failure_policy": {
+                        "n_tries": 3
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                          "-c": 150,
+                          "-gcart": 3
+                        }
+                    },
+                    "additional": {
+                      "tm_config_dir": "/projects/cc/mai/material/Icolos/turbomole_config",
+                      "tm_config_basename": "b97-3c-ri-d3-def2-mtzvp-int-charge",
+                      "tm_config_cosmo": "/projects/cc/mai/material/Icolos/turbomole_config/cosmoprep_eps80.tm",
+                      "execution_mode": "jobex"
+                    }
+                },
+                "input": {
+                    "compounds": [
+                        {
+                            "source": "clustering",
+                            "source_type": "step"
+                        }
+                    ]
+                },
+                "writeout": [{
+                    "compounds": {
+                        "category": "extra_data",
+                        "key": "cosmo_file"
+                    },
+                    "destination": {
+                        "resource": "{entrypoint_dir}/tests/junk/[compound_name]/[conformer_id].cosmo",
+                        "type": "file",
+                        "format": "TXT"
+                    }
+                }, {
+                    "compounds": {
+                        "category": "extra_data",
+                        "key": "coord_file"
+                    },
+                    "destination": {
+                        "resource": "{entrypoint_dir}/tests/junk/[compound_name]/[conformer_id].coord",
+                        "type": "file",
+                        "format": "TXT"
+                    }
+                }]
+            }, {
+                "step_id": "cosmo_opt",
+                "type": "cosmo",
+                "execution": {
+                    "prefix_execution": "module load COSMOtherm/20.0.0"
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [
+                        {
+                            "source": "turbomole_opt",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            },
+            {
+                "step_id": "boltzmann_weighting_opt",
+                "type": "boltzmann_weighting",
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "properties": [
+                                {
+                                    "input": "G_h2o",
+                                    "output": "boltzfactor_wat"
+                                },
+                                {
+                                    "input": "G_meoh",
+                                    "output": "boltzfactor_meoh"
+                                },
+                                {
+                                    "input": "G_octanol",
+                                    "output": "boltzfactor_octanol"
+                                },
+                                {
+                                    "input": "G_dmso",
+                                    "output": "boltzfactor_dmso"
+                                },
+                                {
+                                    "input": "G_cychex",
+                                    "output": "boltzfactor_cychex"
+                                },
+                                {
+                                    "input": "G_chcl3",
+                                    "output": "boltzfactor_chcl3"
+                                },
+                                {
+                                    "input": "G_acn",
+                                    "output": "boltzfactor_acn"
+                                },
+                                {
+                                    "input": "G_thf",
+                                    "output": "boltzfactor_thf"
+                                }
+                            ],
+                            "weight": {
+                                "input": ["area", "HB_acc", "volume", "HB_don", "sigma2", "Gsolv_meoh", "dipole"],
+                                "output_prefix": "bf_weighted",
+                                "properties": ["boltzfactor_dmso", "boltzfactor_wat",
+                                               "boltzfactor_meoh", "boltzfactor_cychex"]
+                            }
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [
+                        {
+                            "source": "cosmo_opt",
+                            "source_type": "step"
+                        }
+                    ]
+                },
+                "writeout": [
+                  {
+                    "compounds": {
+                      "category": "conformers"
+                    },
+                    "destination": {
+                      "resource": "{entrypoint_dir}/tests/junk/full_rescoss_reweighting_output_opt.sdf",
+                      "type": "file",
+                      "format": "SDF"
+                    }
+                  }
+                ]
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/examples/workflow/reinvent/feature_counter.json b/examples/workflow/reinvent/feature_counter.json
new file mode 100644
index 0000000..6ba23d8
--- /dev/null
+++ b/examples/workflow/reinvent/feature_counter.json
@@ -0,0 +1,71 @@
+{
+    "workflow": {
+        "header": {
+            "workflow_id": "Feature counting",
+            "description": "Feature counting - number of rings.",
+            "environment": {
+            },
+            "global_variables": {
+                "input_path_json": "{entrypoint_dir}/../IcolosData/reinvent/small_input.json",
+                "output_path_json": "{entrypoint_dir}/tests/junk/fc_rings_reinvent.json"
+            }
+        },
+        "steps": [{
+                "step_id": "embed_molecules",
+                "type": "embedding",
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "method": "rdkit"
+                        }
+                    },
+                    "additional": {
+                        "embed_as": "conformers"
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "{input_path_json}",
+                            "source_type": "file",
+                            "format": "JSON"
+
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "feature_count",
+                "type": "feature_counter",
+                "settings": {
+                    "additional": {
+                        "feature": "num_rings"
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "embed_molecules",
+                            "source_type": "step"
+                        }
+                    ]
+                },
+                "writeout": [{
+                        "compounds": {
+                            "category": "conformers",
+                            "selected_tags": ["num_rings"],
+                            "aggregation": {
+                                "mode": "best_per_compound",
+                                "key": "num_rings",
+                                "highest_is_best": true
+                            }
+                        },
+                        "destination": {
+                            "resource": "{output_path_json}",
+                            "type": "file",
+                            "format": "JSON"
+                        }
+                    }
+                ]
+            }
+        ]
+    }
+}
diff --git a/examples/workflow/reinvent/nibr_local_reinvent.json b/examples/workflow/reinvent/nibr_local_reinvent.json
new file mode 100644
index 0000000..fad85f9
--- /dev/null
+++ b/examples/workflow/reinvent/nibr_local_reinvent.json
@@ -0,0 +1,159 @@
+{
+    "workflow": {
+        "header": {
+            "workflow_id": "NIBR",
+            "description": "NIBR (local) workflow with returning results in REINVENT JSON format - no use of cloud computing such as AWS.",
+            "environment": {
+                "export": [{
+                        "key": "XTBHOME",
+                        "value": "/projects/cc/mai/binaries/xtb-6.4.0"
+                    }, {
+                        "key": "XTBPATH",
+                        "value": "${XTBHOME}/share/xtb"
+                    }, {
+                        "key": "PATH",
+                        "value": "${PATH}:${XTBHOME}/bin"
+                    }, {
+                        "key": "PKG_CONFIG_PATH",
+                        "value": "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig"
+                    }, {
+                        "key": "PARA_ARCH",
+                        "value": "MPI"
+                    }, {
+                        "key": "PARNODES",
+                        "value": "6"
+                    }
+
+                ]
+            },
+            "global_variables": {
+              "smiles": "aspirin:O=C(C)Oc1ccccc1C(=O)O"
+            }
+        },
+        "steps": [{
+                "step_id": "Ligprep",
+                "type": "ligprep",
+                "execution": {
+                    "prefix_execution": "module load schrodinger/2020-4",
+                    "parallelization": {
+                        "cores": 2,
+                        "max_length_sublists": 1
+                    },
+                    "failure_policy": {
+                        "n_tries": 3
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": ["-epik"],
+                        "parameters": {
+                            "-ph": 7.0,
+                            "-pht": 2.0,
+                            "-s": 10,
+                            "-bff": 14
+                        }
+                    },
+                    "additional": {
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "{smiles}",
+                            "source_type": "string"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "Glide",
+                "type": "glide",
+                "execution": {
+                    "prefix_execution": "module load schrodinger/2021-1-js-aws",
+                    "parallelization": {
+                        "cores": 4,
+                        "max_length_sublists": 1
+                    },
+                    "failure_policy": {
+                        "n_tries": 3
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "-HOST": "cpu-only"
+                        }
+                    },
+                    "additional": {
+                        "configuration": {
+                            "AMIDE_MODE": "trans",
+                            "EXPANDED_SAMPLING": "True",
+                            "GRIDFILE": ["{entrypoint_dir}/../IcolosData/Glide/1UYD_grid_constraints.zip"],
+                            "NENHANCED_SAMPLING": "1",
+                            "POSE_OUTTYPE": "ligandlib_sd",
+                            "POSES_PER_LIG": "3",
+                            "POSTDOCK_NPOSE": "25",
+                            "POSTDOCKSTRAIN": "True",
+                            "PRECISION": "SP",
+                            "REWARD_INTRA_HBONDS": "True"
+                        },
+                        "maestro_in_file": {
+                            "path": "{entrypoint_dir}/../IcolosData/Glide/example.in"
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "Ligprep",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "Shaep",
+                "type": "shaep",
+                "execution": {
+                    "binary_location": "/projects/cc/mai/binaries"
+                },
+                "input": {
+                    "generic": [{
+                            "source": "{entrypoint_dir}/../IcolosData/panther/1uyd_negative_image.mol2",
+                            "extension": "mol2"
+                        }],
+                    "compounds": [{
+                            "source": "Glide",
+                            "target_field": "compounds",
+                            "source_type": "step"
+                        }
+                    ]
+                },
+                "writeout": [{
+                        "compounds": {
+                            "category": "conformers",
+                            "selected_tags": ["shape_similarity", "esp_similarity", "docking_score"]
+                        },
+                        "destination": {
+                            "resource": "{entrypoint_dir}/tests/junk/nibr_reinvent/nibr_reinvent_all.csv",
+                            "type": "file",
+                            "format": "CSV"
+                        }
+                    }, {
+                        "compounds": {
+                            "category": "conformers",
+                            "selected_tags": ["shape_similarity", "esp_similarity", "docking_score"],
+                            "aggregation": {
+                                "mode": "best_per_compound",
+                                "key": "shape_similarity",
+                                "highest_is_best": true
+                            }
+                        },
+                        "destination": {
+                            "resource": "{entrypoint_dir}/tests/junk/nibr_reinvent/nibr_reinvent.json",
+                            "type": "file",
+                            "format": "JSON"
+                        }
+                    }
+                ]
+            }
+        ]
+    }
+}
diff --git a/examples/workflow/rescoring/negative_image_panther.json b/examples/workflow/rescoring/negative_image_panther.json
new file mode 100644
index 0000000..1c4bb3f
--- /dev/null
+++ b/examples/workflow/rescoring/negative_image_panther.json
@@ -0,0 +1,58 @@
+{
+    "workflow": {
+        "header": {
+            "workflow_id": "panther_test_example",
+            "description": "Panther setup to generate negative image.",
+            "environment": {
+                "export": [{
+                        "key": "XTBHOME",
+                        "value": "/projects/cc/mai/binaries/xtb-6.4.0"
+                    }, {
+                        "key": "XTBPATH",
+                        "value": "${XTBHOME}/share/xtb"
+                    }, {
+                        "key": "PATH",
+                        "value": "${PATH}:${XTBHOME}/bin"
+                    }, {
+                        "key": "PKG_CONFIG_PATH",
+                        "value": "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig"
+                    }, {
+                        "key": "PARA_ARCH",
+                        "value": "MPI"
+                    }, {
+                        "key": "PARNODES",
+                        "value": "6"
+                    }
+                ]
+            },
+            "global_variables": {
+            }
+        },
+        "steps": [{
+                "step_id": "negative_image_generation",
+                "type": "panther",
+                "settings": {
+                    "additional": {
+                        "panther_location": "/projects/cc/mai/binaries/panther",
+                        "panther_config_file": "{entrypoint_dir}/../IcolosData/panther/default_panther.in",
+                        "fields": {
+                            "1-Pdb file": "{entrypoint_dir}/../IcolosData/panther/1UYD_holo_residue_X.pdb"
+                        }
+                    }
+                },
+                "writeout": [
+                  {
+                    "generic": {
+                      "key": "mol2"
+                    },
+                    "destination": {
+                      "resource": "{entrypoint_dir}/tests/junk/panther/1uyd_panther_negative_image.mol2",
+                      "type": "file",
+                      "format": "txt"
+                    }
+                  }
+                ]
+            } 
+        ]
+    }
+}
diff --git a/examples/workflow/rescoring/nibr_local.json b/examples/workflow/rescoring/nibr_local.json
new file mode 100644
index 0000000..c0d12bf
--- /dev/null
+++ b/examples/workflow/rescoring/nibr_local.json
@@ -0,0 +1,161 @@
+{
+    "workflow": {
+        "header": {
+            "workflow_id": "NIBR",
+            "description": "NIBR (local) workflow - no use of cloud computing such as AWS.",
+            "environment": {
+                "export": [{
+                        "key": "XTBHOME",
+                        "value": "/projects/cc/mai/binaries/xtb-6.4.0"
+                    }, {
+                        "key": "XTBPATH",
+                        "value": "${XTBHOME}/share/xtb"
+                    }, {
+                        "key": "PATH",
+                        "value": "${PATH}:${XTBHOME}/bin"
+                    }, {
+                        "key": "PKG_CONFIG_PATH",
+                        "value": "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig"
+                    }, {
+                        "key": "PARA_ARCH",
+                        "value": "MPI"
+                    }, {
+                        "key": "PARNODES",
+                        "value": "6"
+                    }
+
+                ]
+            },
+            "global_variables": {
+              "smiles": "aspirin:O=C(C)Oc1ccccc1C(=O)O"
+            }
+        },
+        "steps": [{
+                "step_id": "Ligprep",
+                "type": "ligprep",
+                "execution": {
+                    "prefix_execution": "module load schrodinger/2020-4",
+                    "parallelization": {
+                        "cores": 2,
+                        "max_length_sublists": 1
+                    },
+                    "failure_policy": {
+                        "n_tries": 3
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": ["-epik"],
+                        "parameters": {
+                            "-ph": 7.0,
+                            "-pht": 2.0,
+                            "-s": 10,
+                            "-bff": 14
+                        }
+                    },
+                    "additional": {
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "{smiles}",
+                            "source_type": "string"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "Glide",
+                "type": "glide",
+                "execution": {
+                    "prefix_execution": "module load schrodinger/2021-1-js-aws",
+                    "parallelization": {
+                        "cores": 4,
+                        "max_length_sublists": 1
+                    },
+                    "failure_policy": {
+                        "n_tries": 3
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "-HOST": "cpu-only"
+                        }
+                    },
+                    "additional": {
+                        "configuration": {
+                            "AMIDE_MODE": "trans",
+                            "EXPANDED_SAMPLING": "True",
+                            "GRIDFILE": ["{entrypoint_dir}/../IcolosData/Glide/1UYD_grid_constraints.zip"],
+                            "NENHANCED_SAMPLING": "1",
+                            "POSE_OUTTYPE": "ligandlib_sd",
+                            "POSES_PER_LIG": "3",
+                            "POSTDOCK_NPOSE": "25",
+                            "POSTDOCKSTRAIN": "True",
+                            "PRECISION": "SP",
+                            "REWARD_INTRA_HBONDS": "True"
+                        },
+                        "maestro_in_file": {
+                            "path": "{entrypoint_dir}/../IcolosData/Glide/example.in"
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "Ligprep",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "Shaep",
+                "type": "shaep",
+                "execution": {
+                    "binary_location": "/projects/cc/mai/binaries"
+                },
+                "input": {
+                    "generic": [{
+                            "source": "{entrypoint_dir}/../IcolosData/panther/1uyd_negative_image.mol2",
+                            "extension": "mol2"
+                        }],
+                    "compounds": [{
+                            "source": "Glide",
+                            "target_field": "compounds",
+                            "source_type": "step"
+                        }
+                    ]
+                },
+                "writeout": [
+                  {
+                    "compounds": {
+                      "category": "conformers",
+                      "selected_tags": ["shape_similarity", "esp_similarity"]
+                    },
+                    "destination": {
+                      "resource": "{entrypoint_dir}/tests/junk/nibr_final_all.csv",
+                      "type": "file",
+                      "format": "CSV"
+                    }
+                  },
+                  {
+                      "compounds": {
+                          "category": "conformers",
+                          "selected_tags": ["shape_similarity", "esp_similarity"],
+                          "aggregation": {
+                              "mode": "best_per_compound",
+                              "key": "shape_similarity",
+                              "highest_is_best": true
+                          }
+                      },
+                      "destination": {
+                          "resource": "{entrypoint_dir}/tests/junk/nibr_final_bestpercompound.csv",
+                          "type": "file",
+                          "format": "CSV"
+                      }
+                  }
+                ]
+            }
+        ]
+    }
+}
diff --git a/examples/workflow/rescoring/rmsd_rescoring.json b/examples/workflow/rescoring/rmsd_rescoring.json
new file mode 100644
index 0000000..1315064
--- /dev/null
+++ b/examples/workflow/rescoring/rmsd_rescoring.json
@@ -0,0 +1,223 @@
+{
+    "workflow": {
+        "header": {
+            "workflow_id": "RMSD_rescoring",
+            "description": "Run RMSD rescoring on docking poses.",
+            "environment": {
+                "export": [{
+                        "key": "XTBHOME",
+                        "value": "/projects/cc/mai/binaries/xtb-6.4.0"
+                    }, {
+                        "key": "XTBPATH",
+                        "value": "${XTBHOME}/share/xtb"
+                    }, {
+                        "key": "PATH",
+                        "value": "${PATH}:${XTBHOME}/bin"
+                    }, {
+                        "key": "PKG_CONFIG_PATH",
+                        "value": "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig"
+                    }, {
+                        "key": "PARA_ARCH",
+                        "value": "MPI"
+                    }, {
+                        "key": "PARNODES",
+                        "value": "6"
+                    }
+
+                ]
+            },
+            "global_variables": {
+                "smiles": "aspirin:O=C(C)Oc1ccccc1C(=O)O"
+            }
+        },
+        "steps": [{
+                "step_id": "Ligprep",
+                "type": "ligprep",
+                "execution": {
+                    "prefix_execution": "module load schrodinger/2020-4",
+                    "parallelization": {
+                        "cores": 2,
+                        "max_length_sublists": 1
+                    },
+                    "failure_policy": {
+                        "n_tries": 3
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": ["-epik"],
+                        "parameters": {
+                            "-ph": 7.0,
+                            "-pht": 2.0,
+                            "-s": 10,
+                            "-bff": 14
+                        }
+                    },
+                    "additional": {}
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "{smiles}",
+                            "source_type": "string"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "Glide",
+                "type": "glide",
+                "execution": {
+                    "prefix_execution": "module load schrodinger/2021-1-js-aws",
+                    "parallelization": {
+                        "cores": 4,
+                        "max_length_sublists": 1
+                    },
+                    "failure_policy": {
+                        "n_tries": 3
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "-HOST": "cpu-only"
+                        }
+                    },
+                    "additional": {
+                        "configuration": {
+                            "AMIDE_MODE": "trans",
+                            "EXPANDED_SAMPLING": "True",
+                            "GRIDFILE": [
+                                "{entrypoint_dir}/../IcolosData/Glide/1UYD_grid_constraints.zip"
+                            ],
+                            "NENHANCED_SAMPLING": "1",
+                            "POSE_OUTTYPE": "ligandlib_sd",
+                            "POSES_PER_LIG": "3",
+                            "POSTDOCK_NPOSE": "25",
+                            "POSTDOCKSTRAIN": "True",
+                            "PRECISION": "SP",
+                            "REWARD_INTRA_HBONDS": "True"
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "Ligprep",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "compound_filter",
+                "type": "data_manipulation",
+                "settings": {
+                    "additional": {
+                        "action": "filter",
+                        "filter_level": "compounds",
+                        "criteria": "docking_score",
+                        "return_n": 1,
+                        "highest_is_best": false
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "Glide",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "xtb",
+                "type": "xtb",
+                "execution": {
+                    "binary_location": "/projects/cc/mai/binaries/xtb-6.4.0",
+                    "parallelization": {
+                        "cores": 4
+                    }
+                },
+                "settings": {
+                    "arguments": {
+                        "flags": [],
+                        "parameters": {
+                            "--opt": "vtight",
+                            "--gbsa": "h2o"
+                        }
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "compound_filter",
+                            "source_type": "step"
+                        }
+                    ]
+                },
+                "writeout": [{
+                        "compounds": {
+                            "category": "conformers"
+                        },
+                        "destination": {
+                            "resource": "{entrypoint_dir}/tests/junk/rmsd_rescoring_xtb.sdf",
+                            "type": "file",
+                            "format": "SDF"
+                        }
+                    }
+                ]
+            }, {
+                "step_id": "data_manipulation",
+                "type": "data_manipulation",
+                "settings": {
+                    "additional": {
+                        "action": "attach_conformers_as_extra",
+                        "source": "xtb"
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "compound_filter",
+                            "source_type": "step"
+                        }
+                    ]
+                }
+            }, {
+                "step_id": "rmsd",
+                "type": "rmsd",
+                "settings": {
+                    "additional": {
+                        "method": "alignmol"
+                    }
+                },
+                "input": {
+                    "compounds": [{
+                            "source": "data_manipulation",
+                            "source_type": "step"
+                        }
+                    ]
+                },
+                "writeout": [{
+                        "compounds": {
+                            "category": "conformers"
+                        },
+                        "destination": {
+                            "resource": "{entrypoint_dir}/tests/junk/rmsd_rescoring.sdf",
+                            "type": "file",
+                            "format": "SDF"
+                        }
+                    }, {
+                        "compounds": {
+                            "category": "conformers",
+                            "selected_tags": ["docking_score", "rmsd", "grid_id"],
+                            "aggregation": {
+                                "mode": "best_per_compound",
+                                "key": "docking_score"
+                            }
+                        },
+                        "destination": {
+                            "resource": "{entrypoint_dir}/tests/junk/rmsd_rescoring.csv",
+                            "type": "file",
+                            "format": "CSV"
+                        }
+                    }
+                ]
+            }
+        ]
+    }
+}
diff --git a/executor.py b/executor.py
new file mode 100644
index 0000000..290ecae
--- /dev/null
+++ b/executor.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+#  coding=utf-8
+
+import os
+import sys
+import json
+import argparse
+from datetime import datetime
+from icolos.core.composite_agents.workflow import WorkFlow
+
+from icolos.loggers.entrypoint_logger import EntryPointLogger
+
+from icolos.utils.enums.composite_agents_enums import WorkflowEnum
+from icolos.utils.enums.logging_enums import LoggingConfigEnum
+from icolos.utils.enums.entry_points import ExecutorEnum
+
+from icolos.utils.entry_point_functions.logging_helper_functions import (
+    initialize_logging,
+)
+from icolos.utils.entry_point_functions.parsing_functions import parse_header
+from icolos.utils.general.files_paths import attach_root_path
+
+
+if __name__ == "__main__":
+
+    # enums
+    _LE = LoggingConfigEnum()
+    _EE = ExecutorEnum()
+    _WE = WorkflowEnum()
+
+    # initialize logger
+    logger = EntryPointLogger()
+
+    # get the input parameters and parse them
+    parser = argparse.ArgumentParser(
+        description='Implements entry point for the "Icolos" workflow class.'
+    )
+    parser.add_argument(
+        "-conf",
+        type=str,
+        default=None,
+        help="A path to an workflow's configuration file (JSON dictionary) that is to be executed.",
+    )
+    parser.add_argument(
+        "-debug",
+        action="store_true",
+        help='Set this flag to activate the inbuilt debug logging mode (this will overwrite parameter "-log_conf", if set).',
+    )
+    parser.add_argument(
+        "--global_variables",
+        nargs="+",
+        default=None,
+        type=str,
+        help='List of strings, setting global variables with key and value, e.g. "root:/path/to/root".',
+    )
+    parser.add_argument(
+        "--global_settings",
+        nargs="+",
+        default=None,
+        type=str,
+        help='List of strings, setting global settings with key and value, e.g. "remove_temporary_files:False, single_directory:True".',
+    )
+    args, args_unk = parser.parse_known_args()
+
+    if args.conf is None or not os.path.isfile(args.conf):
+        raise Exception(
+            'Parameter "-conf" must be a relative or absolute path to a configuration (JSON) file.'
+        )
+
+    # load configuration
+    with open(args.conf) as file:
+        conf = file.read().replace("\r", "").replace("\n", "")
+        conf = json.loads(conf)
+
+    # set the logging configuration according to parameters
+    log_conf = attach_root_path(_LE.PATH_CONFIG_DEFAULT)
+    if args.debug:
+        log_conf = attach_root_path(_LE.PATH_CONFIG_DEBUG)
+    logger = initialize_logging(log_conf_path=log_conf, workflow_conf=conf)
+
+    # update global variables and settings
+    conf = parse_header(
+        conf=conf, args=args, entry_point_path=os.path.realpath(__file__), logger=logger
+    )
+
+    # generate workflow object
+    workflow = WorkFlow(**conf[_WE.WORKFLOW])
+    workflow.initialize()
+
+    # execute the whole workflow
+    st_time = datetime.now()
+    workflow.execute()
+    exec_time = datetime.now() - st_time
+    logger.log(f"Icolos workflow completed. Walltime: {exec_time}.", _LE.INFO)
+    sys.exit(0)
diff --git a/external_documentation/REINVENT_input.json b/external_documentation/REINVENT_input.json
new file mode 100644
index 0000000..84b08ee
--- /dev/null
+++ b/external_documentation/REINVENT_input.json
@@ -0,0 +1,4 @@
+{
+    "names": ["0", "1", "3"],
+    "smiles": ["C#CCCCn1", "CCCCn1c", "CC(C)(C)CCC1(c2"]
+}
\ No newline at end of file
diff --git a/external_documentation/REINVENT_result.json b/external_documentation/REINVENT_result.json
new file mode 100644
index 0000000..86894dd
--- /dev/null
+++ b/external_documentation/REINVENT_result.json
@@ -0,0 +1,9 @@
+{
+  "results": [
+    {
+      "values_key": "score",
+      "values": [1.0, 2.1, 3.2, "", "", 4.3, 7.0]
+    }
+  ],
+  "names": ["mol1", "mol2", "mol3", "mol4", "mol5", "mol6", "ref_mol"]
+}
\ No newline at end of file
diff --git a/external_documentation/fep_mapper.txt b/external_documentation/fep_mapper.txt
new file mode 100644
index 0000000..6636444
--- /dev/null
+++ b/external_documentation/fep_mapper.txt
@@ -0,0 +1,102 @@
+# Version 2020-4
+
+Command line: $SCHRODINGER/run -FROM scisol fep_mapper.py -full-help
+usage: fep_mapper.py [-h] [-v] [-full-help] [-o BASENAME] [-s BASENAME] [-x FILENAME] [-r RECEPTOR] [-e N]
+                     [-rha RECEPTOR_HOTATOMS_ASL] [-ligand-hotatoms-rule-complex LIGAND_HOTATOMS_RULE_COMPLEX]
+                     [-ligand-hotatoms-rule-solvent LIGAND_HOTATOMS_RULE_SOLVENT] [-t TOPOLOGY] [-b BIAS] [-add-bias ADD_BIAS]
+                     [-C CUTOFF] [-simi-cutoff SIMI_CUTOFF] [-num-procs NUM_PROCS] [-align-core-only] [-rule RULE] [-simiscore]
+                     [-force-optimize] [-connect-disconnected-nodes] [-generate-neutral-intermediates] [-clear-predicted-ddg]
+                     [-m ATOM_MAPPING] [-strict-matching] [-extend-mapping]
+                     [-mapping-scheme {position,position_and_bonding,protein_by_residue}] [-debug] [-mp METALLOPROTEIN] [-ats]
+                     [-ats-min-barrier-height ATS_MIN_BARRIER_HEIGHT] [-ats-max-bond-dist ATS_MAX_BOND_DIST]
+                     [-ats-max-core-reduction ATS_MAX_CORE_REDUCTION]
+                     [structure_or_graph]
+
+Popular examples:
+
+- Generate optimized-topology graph:
+
+    $SCHRODINGER/run -FROM scisol fep_mapper.py foo.mae -o foo
+
+- Generate optimized-topology graph with custom core:
+
+    $SCHRODINGER/run -FROM scisol fep_mapper.py foo.mae -o foo -m "CC(=O)NCC(=O)NC"
+
+- Generate star-topology graph with custom core:
+
+    $SCHRODINGER/run -FROM scisol fep_mapper.py foo.mae -o foo -m "CC(=O)NCC(=O)NC" -t star
+
+positional arguments:
+  structure_or_graph    Structure file in Pose Viewer format (_pv.mae) or graph (.fmp) file. If .fmp file is given, the graph
+                        optimization is only performed with -force-optimize option.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -v, -version          show program's version number and exit
+  -full-help            List all available options.
+  -o BASENAME, -output BASENAME
+                        output files' base name. Files to be written: <basename>.edge, <basename>.fmp.
+  -s BASENAME, -siminp BASENAME
+                        simulation input files' base name. When this option is specified, a number of input files for FEP simulations
+                        will be written out.
+  -x FILENAME, -extend FILENAME
+                        extend the graph as saved in file FILENAME.
+  -r RECEPTOR, --receptor RECEPTOR
+                        -receptor is DEPRECATED: Please specify -environment
+  -e N, -environment N  specify the initial N structures as the common environment structures. This option is needed when you want to
+                        write out structure input files for relative binding free energy calculations.
+  -rha RECEPTOR_HOTATOMS_ASL, -receptor-hotatoms-asl RECEPTOR_HOTATOMS_ASL
+                        ASL expression to specify receptor hot atoms. Ligand hot atoms are reset to default unless '-ligand-hotatoms-
+                        rule-complex' and '-ligand-hotatoms-rule-solvent' are specified
+  -ligand-hotatoms-rule-complex LIGAND_HOTATOMS_RULE_COMPLEX
+                        REST rule for ligand in complex leg. Must be used with -receptor-hotatoms-asl option.
+  -ligand-hotatoms-rule-solvent LIGAND_HOTATOMS_RULE_SOLVENT
+                        REST rule for ligand in solvent leg. Must be used with -receptor-hotatoms-asl option.
+  -t TOPOLOGY, -topology TOPOLOGY
+                        Graph topology type, available options: ['full', 'normal', 'star', 'windmill']. Default is 'normal'
+  -b BIAS, -bias BIAS   A 'bias' value will result in a graph with biased nodes (hubs), which correspond to structures with nonzero
+                        values of the CT-level property 'i_fepmapper_bias'.
+  -add-bias ADD_BIAS    Tag the given ligand as bias.
+  -C CUTOFF, -cutoff CUTOFF
+                        Specifies the maximum number of unmapped atoms between the two structures in the edge. If the number of
+                        unmapped atoms is greater than CUTOFF, the similarity score of the edge is zero. Note that higher similarity-
+                        score cutoffs correspond to smaller values of this option. Default: No cutoff. Also see -simi-cutoff
+  -simi-cutoff SIMI_CUTOFF
+                        Specifies the minimum similarity score. Edges with lower similarity scoreswill be deleted. Default: 0 (no
+                        cutoff)
+  -num-procs NUM_PROCS  Number of processes used for graph optimization. Default is to use 1 CPU core
+  -align-core-only      do not adjust the non-core atoms when aligning the core atoms.
+  -rule RULE            specify custom rules for similarity-score calculation. Default value is:
+                        'Mcs,Charge,SoftBond,MinimumNumberOfAtom,SnapCoreRmsd,BidirectionSnapCore'.
+  -simiscore            print out detailed account of similarity scores for all pairs.
+  -force-optimize       If a graph file is given, it will be re(optimize) if this option is provided
+  -connect-disconnected-nodes
+                        Only optimize non-edge nodes. This option only works if fmp file is given
+  -generate-neutral-intermediates
+                        Add neutral ligand between core hopping and charged ligand. This option is currentlyonly availabe for star
+                        graph
+  -clear-predicted-ddg, -ignore-ddg
+                        Clear existing predicted ddG values.
+  -m ATOM_MAPPING, -atom-mapping ATOM_MAPPING
+                        Specify custom core with SMARTS pattern(s). Multiple SMARTS patterns should be separated with space, e.g.,
+                        `-m "CCN CCO"`. Note: Each molecule should ideally match only one SMARTS. If a molecule matches more than 1
+                        SMARTS, the longest match will be chosen, and this match should be unique, otherwise matching fails due to
+                        ambiguity. This option can be used with -extend-mapping.
+  -strict-matching      If set, unsuccessful core smarts matching on a molecule will eliminate all edges from this molecule. If not
+                        set, all atoms will be used for matching in case of unsuccessful core smarts matching.
+  -extend-mapping       If given, the match will be allowed as an extension of the SMARTS pattern; if false, the match should be just
+                        the SMARTS partern (will warn when a unique match cannot be found
+  -mapping-scheme {position,position_and_bonding,protein_by_residue}
+                        Mechanism for mapping atoms between structures: position: simple position-based mapping;
+                        position_and_bonding: approximate position and bonding based mapping; protein_by_residue: residue mapping for
+                        selectivity FEP. Default = position_and_bonding
+  -debug
+  -mp METALLOPROTEIN, -metalloprotein METALLOPROTEIN
+                        write out siminp files for metalloprotein workflow.
+  -ats                  perform automated torsional scaling
+  -ats-min-barrier-height ATS_MIN_BARRIER_HEIGHT
+                        min barrier height for flagging torsions to scale (default: 8.0)
+  -ats-max-bond-dist ATS_MAX_BOND_DIST
+                        max bond dist from mutations for flagging torsions to scale (default: 3)
+  -ats-max-core-reduction ATS_MAX_CORE_REDUCTION
+                        max permitted number of atoms removed from core due to ats (default: 10)
diff --git a/external_documentation/fep_plus.txt b/external_documentation/fep_plus.txt
new file mode 100644
index 0000000..c5db7d1
--- /dev/null
+++ b/external_documentation/fep_plus.txt
@@ -0,0 +1,109 @@
+# Version 2020-4
+
+usage:
+* Run a new job:
+    $SCHRODINGER/fep_plus -HOST <master-host> -SUBHOST <subhost> -JOBNAME <jobname> <pv-or-fmp-file>
+
+* Run a new job with custom workflow:
+    $SCHRODINGER/fep_plus -HOST <master-host> -SUBHOST <subhost> -JOBNAME <jobname> -m <custom-workflow.msj> <pv-or-fmp-file>
+
+* Restart a previously interrupted job:
+    $SCHRODINGER/fep_plus -HOST <master-host> -SUBHOST <subhost> -JOBNAME <jobname> -RESTART -checkpoint <multisim-checkpoint-file>
+
+* Extend production simulations for certain edges:
+    $SCHRODINGER/fep_plus -HOST <master-host> -SUBHOST <subhost> -JOBNAME <jobname> -extend <edge-file> -checkpoint <multisim-checkpoint-file>
+  An example for the format of an edge-file:
+     36da5ad:397128e
+     33dd5ad:347118e
+     33fe5ad:3171f8e
+  Each line specifies an edge with the two node's IDs. Each node ID is a hex
+  number of at least 7 digits. The two IDs are separated by a ':' (or '-' or
+  '_').
+
+* Prepare input files for multisim. Do NOT run job:
+    $SCHRODINGER/fep_plus -HOST <master-host> -SUBHOST <subhost> -JOBNAME <jobname> <pv-or-fmp-file> -prepare
+
+* Run a protein residue mutation job:
+    $SCHRODINGER/fep_plus -HOST <master-host> -SUBHOST <subhost> -JOBNAME <jobname> <mae-file> -protein <mutation-file> -solvent_asl <SOLVENT-ASL>
+  <mutation-file> follows the same format as used by $SCHRODINGER/run residue_scanning_backend.py -muts_file
+
+* Run a protein stability job:
+    $SCHRODINGER/fep_plus -HOST <master-host> -SUBHOST <subhost> -JOBNAME <jobname> <mae-file> -protein <mutation-file>
+
+* Add mutations to a complete protein fep job:
+    $SCHRODINGER/fep_plus  <out-fmp-file> -HOST <master-host> -SUBHOST <subhost> -JOBNAME <jobname> -protein <mutation-file> -expand_protein <mae-file>
+
+* Run a metalloprotein FEP job:
+    $SCHRODINGER/fep_plus -HOST <master-host> -SUBHOST <subhost> -JOBNAME <jobname> <pv-or-fmp-file> -mp <property-name>
+
+positional arguments:
+  inp_file              A fmp or a pv structure file
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -m <FILE.msj>         Use custom workflow instead of the auto-generated one.
+  -ff {OPLS_2005|OPLS3e}
+                        Specify the forcefield to use. Default: OPLS3e.
+  -seed <integer>       Specify seed of pseudorandom number generator for initial atom velocities. Default: 2014
+  -ppj PPJ              Specify number of processors per job. Default: 4.
+  -checkpoint <multisim-checkpiont-file>
+                        Specify the multisim checkpoint file.
+  -prepare              Do not run job. Only prepare multisim input files.
+  -JOBNAME <string>     Specify the job name.
+  -buffer <real>        Specify a larger buffer size (in Angstroms). Defaults: 5 in complex leg; 5 in solvent leg of protein-residue-mutation FEP;
+                        10 in solubility FEP; 10 in solvent leg of other types of FEP. The custom value will be used only if it's greater than the
+                        corresponding default values.
+  -maxjob <integer>     Maximum number of simultaneous subjobs. Default: 0 (unlimited)
+  -lambda-windows <integer>, -lambda_windows <integer>
+                        Number of lambda windows for the default protocol. Default: 12
+  -ensemble {muVT|NPT|NVT}
+                        Specify the ensemble class. Default: muVT.
+  -time <real>          Specify the production-simulation time (in ps). For extension, this option specifies the additional simulation time (in
+                        ps). Default: 5000.0. Min value: 500.0.
+  -protein <mutation>   Generate and run protein residue mutation if a mutation_file is given here and a solvent_asl is also provided; Generate and
+                        run protein stability when a mutation_file is given here and no solvent_asl is provided
+  -mp [<property_name>]
+                        Generate and run metalloprotein workflow.
+  -solvent-asl SOLVENT_ASL, -solvent_asl SOLVENT_ASL
+                        Specify ASL to put in solvent leg for protein residue mutation
+  -vacuum               Include vacuum simulations. Only supported for small molecule FEP.
+  -extend <edge-file>   Extend production simulations of specified edges.
+  -atom-mapping <string>, -atom_mapping <string>
+                        Atom mapping specification for leadoptmap.py. For small molecule FEP, specify SMARTS string to customize core assignment;
+                        for protein residue mutation FEP, 'sidechain' is the only argument allowing the side chain atoms to be mapped as well while
+                        by default the side chains are not mapped. This option will be ignored if fmp file is provided as input.
+  -modify-dihe, -modify_dihe
+                        Modify retained dihedral angle interactions for customized core.
+  -no-h-mass, -no_h_mass
+                        Turn off hydrogen mass repartitioning (on by default).
+  -membrane             Indicates the model system is a membrane protein system, such as the GPCR.
+  -charged-lambda-windows <integer>, -charged_lambda_windows <integer>
+                        Number of lambda windows for the charge protocol. Default: 24
+  -core-hopping-lambda-windows <integer>, -core_hopping_lambda_windows <integer>
+                        Number of lambda windows for the core-hopping protocol. Default: 16
+  -residue-structure <mae-file>, -residue_structure <mae-file>
+                        Noncanonical amino acids for protein mutation.
+  -expand-protein EXPAND_PROTEIN, -expand_protein EXPAND_PROTEIN
+                        Pass the structure file for protein fep to re-run with additional mutations.
+  -water <string>       Specify the water model used in the simulations. Valid values: SPC, TIP3P, TIP4P, TIP4PEW, TIP4PD, TIP5P. Default: SPC
+  -custom-charge-mode <string>, -custom_charge_mode <string>
+                        Set the custom charge calculation mode when using the OPLS3e forcefield.Default is to 'assign' custom charges based on the
+                        input geometries.Set to 'clear' to clear custom charges without assigning them.Set to 'keep' to keep existing custom charge
+                        parameters.
+  -skip-leg [<property_name>]
+                        Specify the legs to skip (complex/solvent/vacuum). Pass in multiple times to skip multiple legs
+
+Job Control Options:
+  -HOST <hostname>      Run job remotely on the indicated host entry.
+  -WAIT                 Do not return a prompt until the job completes.
+  -LOCAL                Do not use a temporary directory for job files. Keep files in the current directory.
+  -D, -DEBUG            Show details of Job Control operation.
+  -TMPDIR TMPDIR        The name of the directory used to store files temporarily during a job.
+  -SAVE                 Return zip archive of job directory at job completion.
+  -OPLSDIR OPLSDIR      Specifies directory for custom forcefield parameters.
+
+Standard Options:
+  -SUBHOST <hostname> or -SUBHOST <hostname:nproc> or -SUBHOST "hostname1:nproc1 ... hostnameN:nprocN"
+                        Run the subjobs on the specified hosts. The driver is run on the host specified with -HOST.
+  -RETRIES RETRIES      If a subjob fails for any reason, it will be retried RETRIES times.
+  -RESTART              Restart a previously failed job, utilizing any already completed subjobs.
diff --git a/external_documentation/prime_arguments.txt b/external_documentation/prime_arguments.txt
new file mode 100644
index 0000000..093d046
--- /dev/null
+++ b/external_documentation/prime_arguments.txt
@@ -0,0 +1,180 @@
+$SCHRODINGER/prime_mmgbsa [<options>] <struct_file>
+
+    Any option can be placed in an input file for ease of use.  The input
+    structure should be specified on a line using "STRUCT_FILE <input mae file>".
+    All other options should be placed one-per-line with the preceding dash
+    removed.  For example, the following two calculations are equivalent:
+
+    > prime_mmgbsa file_pv.mae -job_type ENERGY -lcons SMARTS.C
+
+    or
+
+    > prime_mmgbsa input.inp
+
+    where the contents of input.inp are:
+
+        STRUCT_FILE file_pv.mae
+        JOB_TYPE    ENERGY
+        LCONS       SMARTS.C
+
+    Note that HOST, SUBHOST and NICE flags must be set via the commandline.
+
+    All atoms in the ligand will have the i_psp_Prime_MMGBSA_Ligand property
+    set to 1, so that asl expressions can then be used for specifying parts of
+            the structure relative to the ligand.  For example,
+        "fillres within 5 (atom.i_psp_Prime_MMGBSA_Ligand 1)"
+    selects all residues within 5 Angstroms of the ligand.
+
+    SMARTS expressions can be included in all asl expressions.  The command
+    -lcons SMARTS.C will apply constraints to all aliphatic carbons in the ligand.
+
+    run $SCHRODINGER/prime_mmgbsa -h for a complete listing of all options.
+
+
+positional arguments:
+  struct_file           For most situations, this should be a Maestro file
+                        with the receptor as the first entry, followed by the
+                        ligand poses (e.g. a Glide pose viewer file). If the
+                        -ligand option is specified, then the input should
+                        instead be a Maestro file with each entry containing a
+                        protein-ligand complex.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -report_prime_log REPORT_PRIME_LOG
+                        (yes/no) Return an output file with the extension
+                        .Prime.log with the Prime logfiles of all component
+                        jobs. By default this in included if less than 100
+                        ligands are used
+  -csv_output CSV_OUTPUT
+                        (yes/no) Return a csv format output file -out.csv with
+                        the calculated energies.
+  -report_top REPORT_TOP
+                        Report the specified number of top-scoring ligands in
+                        the log file. All ligands are reported in the CSV and
+                        structure output files.
+  -v                    show program's version number and exit
+  -jobname JOBNAME, -JOBNAME JOBNAME, -j JOBNAME
+                        Set the base name of outputs
+  -restart_file RESTART_FILES
+                        Output of a partially completed subjob (this keyword
+                        can be can be used multiple times if there are
+                        multiple completed subjobs)
+  -RESTART              Guess names of restart files for -restart_file option
+
+Input:
+  -ligand LIGAND_ASL    Specify the ligand with an asl expression (required
+                        for trajectory processing). If this option is present
+                        then the input should be a maestro input file with
+                        each entry containing both the ligand and the
+                        receptor. The asl expression provided here will be
+                        used to determine which part of the complex structure
+                        is the ligand.
+
+Output:
+  -out_type {PV,COMPLEX,LIGAND,FLEXIBLE,COMPLETE}
+                        The type of Maestro file to output. Choices are PV,
+                        COMPLEX, LIGAND, FLEXIBLE and COMPLETE . LIGAND
+                        produces a ligand-only file. PV will produce a
+                        combination of the input receptor and the optimized
+                        ligand positions. COMPLEX will return the optimized
+                        ligand and receptor conformations. FLEXIBLE outputs
+                        the optimized conformations of the flexible portions.
+                        Please note that this is not a full protein structure
+                        and cannot be used for any subsequent calculations. Of
+                        these, only COMPLEX will return the full optimized
+                        receptor conformation. PV files are default if
+                        inputting a PV file, COMPLEX files are the default if
+                        inputting a series of complexes. COMPLETE includes the
+                        optimized free receptor and ligand structures to the a
+                        complex output file
+
+Molecular Mechanics (PRIME):
+  -receptor_region RSEL_ASL, -rsel RSEL_ASL
+                        Designate a region of the receptor as flexible using
+                        an asl expression. Expressions can refer to atoms in
+                        the ligand or the receptor and the selected region is
+                        the union of all the regions for each ligand-receptor
+                        pair in the input PV file. By default the entire
+                        receptor is frozen.
+  -rflexdist FLEXDIST, -flexdist FLEXDIST
+                        Treat all residues within this distance of the ligand
+                        as flexible. By default the entire receptor is frozen.
+                        (overwrites -receptor_region flag)
+  -rflexgroup {residue,side,polarh}
+                        Select a portion of the region defined with rflexdist
+                        flag to be flexible. residue: Choose the entire
+                        residue. side: Choose the sidechain of each residue.
+                        polarh: Choose the polar hydrogens on each residue.
+  -target_flexibility   Run a two-stage MMGBSA calculation where the second
+                        stage runs with the subset of flexible protein
+                        residues identified by the first
+  -target_flexibility_cutoff TARGET_FLEXIBILITY_CUTOFF
+                        Cutoff for determining movement for target flexibility
+                        in Angstroms
+  -ligand_region LSEL_ASL, -lsel LSEL_ASL
+                        Choose a section of the ligand to be treated as
+                        flexible. By default the entire ligand is flexible.
+  -job_type {ENERGY,REAL_MIN,SIDE_PRED,SIDE_COMBI,SITE_OPT,PGL}
+                        Prime jobtype to use to sample flexible regions.
+                        Setting this option multiple times will result in
+                        multiple sampling algorithms being used. Options are:
+                        REAL_MIN (default): Local minimization. ENERGY: No
+                        sampling, just calculate a single-point energy.
+                        SIDE_PRED: Optimize sidechains using Prime sidechain
+                        prediction. SIDE_COMBI: Optimize sidechain using
+                        Combinatorial Sidechain Prediction. (Limited to <5
+                        sidechains). SITE_OPT: Run a binding-site optimization
+                        consiting of prime sidechain predictions and
+                        minimziations designed specifically for predicting
+                        induced fit effects. PGL: Run a Prime PGL Binding-Site
+                        Optimization on all flexible regions. See the manual
+                        for more details on this protocol.
+  -rigid_body           Minimize the ligand as a rigid body
+  -num_output_struct NUM_OUTPUT_STRUCT
+                        The maximum number of poses to return per compound.
+                        This will only be relevant if mulitple job types are
+                        selected or job types that return multiple outputs are
+                        used.
+  -lcons LCONS          Select a portion of the ligand to harmonically
+                        constrain with an ASL expression. By default no
+                        constraints are used.
+  -rcons RCONS          Select a portion of the receptor to harmonically
+                        constrainwith an ASL expression. By default no
+                        constraints are used.
+  -str_cons STR_CONS    Strength of Receptor and Ligand Constraints in
+                        kcals/mol/A^2. The default value is 1.0 kcal/mol/A^2
+  -fbw_cons FBW_CONS    Width of flat bottom potential for constraints in A.
+                        The default value is 0.0A
+  -prime_opt PRIME_OPTIONS
+                        Pass any keyword value pair to the Prime Refinement
+                        stage in the form <keyword>=<value>. See the "Refining
+                        Protein Structures" chapter in the Prime User Manual
+                        for a description of available options. If you would
+                        like to change the force field from it's default value
+                        ( OPLS3e if the proper license is present ) use
+                        -prime_opt OPLS_VERSION=OPLS_2005
+  -use_ligand_charges   Use the partial charges in the input ligand file.
+  -frozen               Do not treat any part of the ligand or receptor as
+                        flexible. This overwrites the -ligand_region and
+                        -receptor_region flags.
+  -membrane             Use Prime implicit membrane model (must be set up in
+                        receptor file through Maestro)
+
+Watermap:
+  -watermap WATERMAP_FN, -WATERMAP WATERMAP_FN
+                        Score ligands against this watermap. Input should be a
+                        ct file containing the watermap generated with the
+                        current version of the Schrodinger suite.
+
+Job Control Options:
+  -HOST <hostname>      Run job remotely on the indicated host entry.
+  -WAIT                 Do not return a prompt until the job completes.
+  -D, -DEBUG            Show details of Job Control operation.
+  -NOJOBID              Run the job directly, without Job Control layer.
+
+Standard Options:
+  -NJOBS NJOBS          Divide the overall job into NJOBS subjobs.
+  -RETRIES RETRIES      If a subjob fails for any reason, it will be retried
+                        RETRIES times. (Default: 3)
+  -NOLAUNCH             Set up subjob inputs, but don't run the jobs.
diff --git a/icolos/__init__.py b/icolos/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/config/amber/default_mmpbsa.in b/icolos/config/amber/default_mmpbsa.in
new file mode 100644
index 0000000..843bebf
--- /dev/null
+++ b/icolos/config/amber/default_mmpbsa.in
@@ -0,0 +1,6 @@
+&general
+startframe=0, endframe=5000000000, verbose=2,
+/
+&gb
+igb=5, saltcon=0.150,
+/
diff --git a/icolos/config/cosmo/default_cosmo.config b/icolos/config/cosmo/default_cosmo.config
new file mode 100644
index 0000000..c7dcf71
--- /dev/null
+++ b/icolos/config/cosmo/default_cosmo.config
@@ -0,0 +1,28 @@
+ctd = BP_TZVPD_FINE_20.ctd cdir = "/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/CTDATA-FILES" ldir = "/opt/scp/software/COSMOtherm/20.0.0/licensefiles"
+unit notempty wtln ehfile
+!! generated by COSMOthermX !!
+f = mol.cosmo
+f = "h2o_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/h" VPfile
+f = "methanol_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/m" VPfile
+f = "1-octanol_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" Comp = "1-octanol" [  VPfile
+f = "1-octanol_c1.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"
+f = "1-octanol_c2.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"
+f = "1-octanol_c3.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"
+f = "1-octanol_c4.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"
+f = "1-octanol_c5.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"
+f = "1-octanol_c6.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" ]
+f = "dimethylsulfoxide_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/d" VPfile
+f = "cyclohexane_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/c" VPfile
+f = "chcl3_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/c" VPfile
+f = "propanone_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/p" VPfile
+f = "acetonitrile_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/a" VPfile
+f = "thf_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/t" VPfile
+henry= 2 tc=25.0 GSOLV
+henry= 3 tc=25.0 GSOLV
+henry= 4 tc=25.0 GSOLV
+henry= 5 tc=25.0 GSOLV
+henry= 6 tc=25.0 GSOLV
+henry= 7 tc=25.0 GSOLV
+henry= 8 tc=25.0 GSOLV
+henry= 9 tc=25.0 GSOLV
+henry= 10 tc=25.0 GSOLV
\ No newline at end of file
diff --git a/icolos/config/desmond/config.msj b/icolos/config/desmond/config.msj
new file mode 100644
index 0000000..babcf80
--- /dev/null
+++ b/icolos/config/desmond/config.msj
@@ -0,0 +1,23 @@
+task {
+  task = "desmond:auto"
+}
+
+build_geometry {
+  add_counterion = {
+     ion = Cl
+     number = neutralize_system
+  }
+  box = {
+     shape = orthorhombic
+     size = [10.0 10.0 10.0 ]
+     size_type = buffer
+  }
+  override_forcefield = S-OPLS
+  rezero_system = false
+  solvent = TIP3P
+}
+
+assign_forcefield {
+  forcefield = S-OPLS
+  water = TIP3P
+}
\ No newline at end of file
diff --git a/icolos/config/desmond/prod.cfg b/icolos/config/desmond/prod.cfg
new file mode 100644
index 0000000..32c426f
--- /dev/null
+++ b/icolos/config/desmond/prod.cfg
@@ -0,0 +1,69 @@
+annealing = false
+backend = {
+}
+bigger_rclone = false
+checkpt = {
+   first = 0.0
+   interval = 240.06
+   name = "$JOBNAME.cpt"
+   write_last_step = true
+}
+cpu = 1
+cutoff_radius = 9.0
+elapsed_time = 0.0
+energy_group = false
+eneseq = {
+   first = 0.0
+   interval = 1.2
+   name = "$JOBNAME$[_replica$REPLICA$].ene"
+}
+ensemble = {
+   barostat = {
+      tau = 2.0
+   }
+   class = NPT
+   method = MTK
+   thermostat = {
+      tau = 1.0
+   }
+}
+glue = solute
+maeff_output = {
+   first = 0.0
+   interval = 120.0
+   name = "$JOBNAME$[_replica$REPLICA$]-out.cms"
+   periodicfix = true
+   trjdir = "$JOBNAME$[_replica$REPLICA$]_trj"
+}
+meta = false
+meta_file = ?
+pressure = [1.01325 isotropic ]
+randomize_velocity = {
+   first = 0.0
+   interval = inf
+   seed = 2007
+   temperature = "@*.temperature"
+}
+restrain = none
+simbox = {
+   first = 0.0
+   interval = 1.2
+   name = "$JOBNAME$[_replica$REPLICA$]_simbox.dat"
+}
+surface_tension = 0.0
+taper = false
+temperature = [
+   [300.0 0 ]
+]
+time = 10.0
+timestep = [0.002 0.002 0.006 ]
+trajectory = {
+   center = []
+   first = 0.0
+   format = dtr
+   frames_per_file = 250
+   interval = 2.0
+   name = "$JOBNAME$[_replica$REPLICA$]_trj"
+   periodicfix = true
+   write_velocity = false
+}
\ No newline at end of file
diff --git a/icolos/config/desmond/production.msj b/icolos/config/desmond/production.msj
new file mode 100644
index 0000000..358ca55
--- /dev/null
+++ b/icolos/config/desmond/production.msj
@@ -0,0 +1,102 @@
+task {
+   task = "desmond:auto"
+   set_family = {
+      desmond = {
+         checkpt.write_last_step = no
+      }
+   }
+}
+
+simulate {
+   title       = "Brownian Dynamics NVT, T = 10 K, small timesteps, and restraints on solute heavy atoms, 100ps"
+   annealing   = off
+   time        = 100
+   timestep    = [0.001 0.001 0.003 ]
+   temperature = 10.0
+   ensemble = {
+      class = "NVT"
+      method = "Brownie"
+      brownie = {
+         delta_max = 0.1
+      }
+   }
+   restrain = {
+      atom = "solute_heavy_atom"
+      force_constant = 50.0
+   }
+}
+
+simulate {
+   title       = "NVT, T = 10 K, small timesteps, and restraints on solute heavy atoms, 12ps"
+   annealing   = off
+   time        = 12
+   timestep    = [0.001 0.001 0.003]
+   temperature = 10.0
+   restrain    = { atom = solute_heavy_atom force_constant = 50.0 }
+   ensemble    = {
+      class  = NVT
+      method = Langevin
+      thermostat.tau = 0.1
+   }
+
+   randomize_velocity.interval = 1.0
+   eneseq.interval             = 0.3
+   trajectory.center           = []
+}
+
+simulate {
+   title       = "NPT, T = 10 K, and restraints on solute heavy atoms, 12ps"
+   annealing   = off
+   time        = 12
+   temperature = 10.0
+   restrain    = retain
+   ensemble    = {
+      class  = NPT
+      method = Langevin
+      thermostat.tau = 0.1
+      barostat  .tau = 50.0
+   }
+
+   randomize_velocity.interval = 1.0
+   eneseq.interval             = 0.3
+   trajectory.center           = []
+}
+
+simulate {
+   title       = "NPT and restraints on solute heavy atoms, 12ps"
+   effect_if   = [["@*.*.annealing"] 'annealing = off temperature = "@*.*.temperature[0][0]"']
+   time        = 12
+   restrain    = retain
+   ensemble    = {
+      class  = NPT
+      method = Langevin
+      thermostat.tau = 0.1
+      barostat  .tau = 50.0
+   }
+
+   randomize_velocity.interval = 1.0
+   eneseq.interval             = 0.3
+   trajectory.center           = []
+}
+
+simulate {
+   title       = "NPT and no restraints, 24ps"
+   effect_if   = [["@*.*.annealing"] 'annealing = off temperature = "@*.*.temperature[0][0]"']
+   time        = 24
+   ensemble    = {
+      class  = NPT
+      method = Langevin
+      thermostat.tau = 0.1
+      barostat  .tau = 2.0
+   }
+
+   eneseq.interval   = 0.3
+   trajectory.center = solute
+}
+
+simulate {
+   cfg_file = "prod.cfg"
+   jobname  = "$MASTERJOBNAME"
+   dir      = "."
+   compress = ""
+}
\ No newline at end of file
diff --git a/icolos/config/logging/debug.json b/icolos/config/logging/debug.json
new file mode 100644
index 0000000..641ed79
--- /dev/null
+++ b/icolos/config/logging/debug.json
@@ -0,0 +1,75 @@
+{
+  "version": 1,
+  "disable_existing_loggers": false,
+  "formatters": {
+    "standard": {
+      "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+      "datefmt": "%Y-%m-%d %H:%M:%S"
+    },
+    "blank": {
+      "format": "%(message)s"
+    }
+  },
+
+  "handlers": {
+    "console": {
+      "class": "logging.StreamHandler",
+      "level": "DEBUG",
+      "formatter": "standard",
+      "stream": "ext://sys.stderr"
+    },
+
+    "file_handler": {
+      "class": "logging.handlers.RotatingFileHandler",
+      "level": "DEBUG",
+      "formatter": "standard",
+      "filename": "icolos_run.log",
+      "maxBytes": 10485760,
+      "backupCount": 20,
+      "encoding": "utf8"
+    },
+
+    "file_handler_blank": {
+      "class": "logging.handlers.RotatingFileHandler",
+      "level": "DEBUG",
+      "formatter": "blank",
+      "filename": "icolos_run.log",
+      "maxBytes": 10485760,
+      "backupCount": 20,
+      "encoding": "utf8"
+    }
+  },
+
+  "loggers": {
+    "command_line_interface": {
+      "level": "DEBUG",
+      "handlers": ["file_handler"],
+      "propagate": false
+    },
+    "target_preparation": {
+      "level": "DEBUG",
+      "handlers": ["file_handler"],
+      "propagate": false
+    },
+    "ligand_preparation": {
+      "level": "DEBUG",
+      "handlers": ["file_handler"],
+      "propagate": false
+    },
+    "docking": {
+      "level": "DEBUG",
+      "handlers": ["file_handler"],
+      "propagate": false
+    },
+    "blank": {
+      "level": "DEBUG",
+      "handlers": ["file_handler_blank"],
+      "propagate": false
+    }
+  },
+
+  "root": {
+    "level": "DEBUG",
+    "handlers": ["file_handler"]
+  }
+}
diff --git a/icolos/config/logging/default.json b/icolos/config/logging/default.json
new file mode 100644
index 0000000..7627e0a
--- /dev/null
+++ b/icolos/config/logging/default.json
@@ -0,0 +1,75 @@
+{
+  "version": 1,
+  "disable_existing_loggers": false,
+  "formatters": {
+    "standard": {
+      "format": "%(asctime)s - %(message)s",
+      "datefmt": "%Y-%m-%d %H:%M:%S"
+    },
+    "blank": {
+      "format": "%(message)s"
+    }
+  },
+
+  "handlers": {
+    "console": {
+      "class": "logging.StreamHandler",
+      "level": "INFO",
+      "formatter": "standard",
+      "stream": "ext://sys.stderr"
+    },
+
+    "file_handler": {
+      "class": "logging.handlers.RotatingFileHandler",
+      "level": "INFO",
+      "formatter": "standard",
+      "filename": "icolos_run.log",
+      "maxBytes": 10485760,
+      "backupCount": 20,
+      "encoding": "utf8"
+    },
+
+    "file_handler_blank": {
+      "class": "logging.handlers.RotatingFileHandler",
+      "level": "INFO",
+      "formatter": "blank",
+      "filename": "icolos_run.log",
+      "maxBytes": 10485760,
+      "backupCount": 20,
+      "encoding": "utf8"
+    }
+  },
+
+  "loggers": {
+    "command_line_interface": {
+      "level": "INFO",
+      "handlers": ["file_handler"],
+      "propagate": false
+    },
+    "target_preparation": {
+      "level": "INFO",
+      "handlers": ["file_handler"],
+      "propagate": false
+    },
+    "ligand_preparation": {
+      "level": "INFO",
+      "handlers": ["file_handler"],
+      "propagate": false
+    },
+    "docking": {
+      "level": "INFO",
+      "handlers": ["file_handler"],
+      "propagate": false
+    },
+    "blank": {
+      "level": "INFO",
+      "handlers": ["file_handler_blank"],
+      "propagate": false
+    }
+  },
+
+  "root": {
+    "level": "INFO",
+    "handlers": ["file_handler"]
+  }
+}
diff --git a/icolos/config/logging/tutorial.json b/icolos/config/logging/tutorial.json
new file mode 100644
index 0000000..1b7ceb5
--- /dev/null
+++ b/icolos/config/logging/tutorial.json
@@ -0,0 +1,75 @@
+{
+  "version": 1,
+  "disable_existing_loggers": false,
+  "formatters": {
+    "standard": {
+      "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+      "datefmt": "%Y-%m-%d %H:%M:%S"
+    },
+    "blank": {
+      "format": "%(message)s"
+    }
+  },
+
+  "handlers": {
+    "console": {
+      "class": "logging.StreamHandler",
+      "level": "DEBUG",
+      "formatter": "standard",
+      "stream": "ext://sys.stdout"
+    },
+
+    "file_handler": {
+      "class": "logging.handlers.RotatingFileHandler",
+      "level": "DEBUG",
+      "formatter": "standard",
+      "filename": "icolos_run.log",
+      "maxBytes": 10485760,
+      "backupCount": 20,
+      "encoding": "utf8"
+    },
+
+    "file_handler_blank": {
+      "class": "logging.handlers.RotatingFileHandler",
+      "level": "DEBUG",
+      "formatter": "blank",
+      "filename": "icolos_run.log",
+      "maxBytes": 10485760,
+      "backupCount": 20,
+      "encoding": "utf8"
+    }
+  },
+
+  "loggers": {
+    "command_line_interface": {
+      "level": "DEBUG",
+      "handlers": ["file_handler"],
+      "propagate": false
+    },
+    "target_preparation": {
+      "level": "DEBUG",
+      "handlers": ["file_handler"],
+      "propagate": false
+    },
+    "ligand_preparation": {
+      "level": "DEBUG",
+      "handlers": ["file_handler"],
+      "propagate": false
+    },
+    "docking": {
+      "level": "DEBUG",
+      "handlers": ["file_handler"],
+      "propagate": false
+    },
+    "blank": {
+      "level": "DEBUG",
+      "handlers": ["file_handler_blank"],
+      "propagate": false
+    }
+  },
+
+  "root": {
+    "level": "DEBUG",
+    "handlers": ["file_handler"]
+  }
+}
diff --git a/icolos/config/logging/verbose.json b/icolos/config/logging/verbose.json
new file mode 100644
index 0000000..7627e0a
--- /dev/null
+++ b/icolos/config/logging/verbose.json
@@ -0,0 +1,75 @@
+{
+  "version": 1,
+  "disable_existing_loggers": false,
+  "formatters": {
+    "standard": {
+      "format": "%(asctime)s - %(message)s",
+      "datefmt": "%Y-%m-%d %H:%M:%S"
+    },
+    "blank": {
+      "format": "%(message)s"
+    }
+  },
+
+  "handlers": {
+    "console": {
+      "class": "logging.StreamHandler",
+      "level": "INFO",
+      "formatter": "standard",
+      "stream": "ext://sys.stderr"
+    },
+
+    "file_handler": {
+      "class": "logging.handlers.RotatingFileHandler",
+      "level": "INFO",
+      "formatter": "standard",
+      "filename": "icolos_run.log",
+      "maxBytes": 10485760,
+      "backupCount": 20,
+      "encoding": "utf8"
+    },
+
+    "file_handler_blank": {
+      "class": "logging.handlers.RotatingFileHandler",
+      "level": "INFO",
+      "formatter": "blank",
+      "filename": "icolos_run.log",
+      "maxBytes": 10485760,
+      "backupCount": 20,
+      "encoding": "utf8"
+    }
+  },
+
+  "loggers": {
+    "command_line_interface": {
+      "level": "INFO",
+      "handlers": ["file_handler"],
+      "propagate": false
+    },
+    "target_preparation": {
+      "level": "INFO",
+      "handlers": ["file_handler"],
+      "propagate": false
+    },
+    "ligand_preparation": {
+      "level": "INFO",
+      "handlers": ["file_handler"],
+      "propagate": false
+    },
+    "docking": {
+      "level": "INFO",
+      "handlers": ["file_handler"],
+      "propagate": false
+    },
+    "blank": {
+      "level": "INFO",
+      "handlers": ["file_handler_blank"],
+      "propagate": false
+    }
+  },
+
+  "root": {
+    "level": "INFO",
+    "handlers": ["file_handler"]
+  }
+}
diff --git a/icolos/config/panther/default_panther.in b/icolos/config/panther/default_panther.in
new file mode 100644
index 0000000..28a35c1
--- /dev/null
+++ b/icolos/config/panther/default_panther.in
@@ -0,0 +1,74 @@
+# Please cite: Niinivehmas et al. (2015) J. Compt. Aided. Mol. Design. 29(10), 989-1006.
+
+# Input and output settings:
+1-Pdb file (pfil):: {pdb_file}
+2-Radius library (rlib):: rad.lib
+3-Angle library (alib):: angles.lib
+4-Charge library file (chlib):: charges.lib
+
+# Cavity location:
+5-Center (s) (cent):: 24.42 21.79 18.26
+6-Radius center algorithm (radc):: 0.00
+7-Basic multipoint (bmp):: null
+
+# Pocket filling:
+8-Filler radius (frad):: 0.85
+
+9-Box radius (brad):: 24.0
+10-Box center (bcen):: null
+11-Multibox (mbox):: Y
+
+#_# Identification settings: #_#
+12-Not empty (nem):: HOH FAD NAP WAT NDP NAI NAD FDA
+13-Force lining (flin):: null
+14-Ignore lining (ilin):: null
+
+#_# Pocket tweaks: #_#
+15-Add oxygen at HEM Fe / dual mode (OFed):: Y Y
+16-Charge radius (chrad):: 0.00
+17-Lowest significant charge (+/-) (lowch):: 0.16
+18-Use waters as polar groups (watpol):: 2
+# - Exclusion settings:
+19-Delete farther than (del):: 4.5
+20-ligand distance limit (ldlim):: X-0 2.0 
+21-False connection angles (fcang):: 180 90
+22-False connection group size (fcgrp):: 200
+23-Exclusion zone (ezon):: null
+24-Angle exclusion (aex):: null
+25-plane-exclusion (pex):: null
+26-Force plane exclusion center (fpec):: null
+
+# - Inclusion settings:
+27-Global keep anyway radius (gkar):: 0.00
+28-Keep anyway radius (kar):: 7.00
+29-AA limit (aalim):: 0
+30-Specific limits (slim):: null
+
+# - Other settings:
+31-Secondary (sec):: N
+32-Cofactor fill (cofil):: N
+
+#_# Rather advanced pocket tweaks. #_#
+33-Packing method (pack):: bcc
+34-Creep radius (creep):: null
+35-Full lining (fulli):: Y
+36-Adjacent lining (adjli):: N
+37-multibounds (mbo):: Y
+38-Max distance of charged atoms (chdist):: 1.6
+39-Agonist-distance (agdist):: 2.5
+
+#_# Various constants. #_#
+40-Angle tolerance (atol):: 30
+41-Resolution Tolerance (retol):: 0.2
+42-Adjacent distance (adjdist):: 3
+43-Boundary increment (boinc):: 1.3
+44-lining id angle (lidang):: 35
+
+# Misc & experimental
+45-Radius for charged atoms (chatrad):: 0.5
+46-Exclusion distance for charged atoms and their residues (radexdres):: 0.6
+47-H-bond distance (hbdist):: 1.7
+48-donor addition (donads):: 1.0
+49-h-bond max distance (hobomax):: 4.2
+
+ 
diff --git a/icolos/config/unit_tests_config/cosmo/cosmo.config b/icolos/config/unit_tests_config/cosmo/cosmo.config
new file mode 100644
index 0000000..7dd7131
--- /dev/null
+++ b/icolos/config/unit_tests_config/cosmo/cosmo.config
@@ -0,0 +1,28 @@
+ctd = BP_TZVPD_FINE_19.ctd cdir = "/opt/scp/software/COSMOtherm/19.0.4/COSMOthermX/../COSMOtherm/CTDATA-FILES" ldir = "/opt/scp/software/COSMOtherm/19.0.4/COSMOthermX/../licensefiles"
+unit notempty wtln ehfile
+!! generated by COSMOthermX !!
+f = mol.cosmo
+f = "h2o_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/h" VPfile
+f = "methanol_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/m" VPfile
+f = "1-octanol_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" Comp = "1-octanol" [  VPfile
+f = "1-octanol_c1.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"
+f = "1-octanol_c2.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"
+f = "1-octanol_c3.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"
+f = "1-octanol_c4.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"
+f = "1-octanol_c5.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"
+f = "1-octanol_c6.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" ]
+f = "dimethylsulfoxide_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/d" VPfile
+f = "cyclohexane_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/c" VPfile
+f = "chcl3_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/c" VPfile
+f = "propanone_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/p" VPfile
+f = "acetonitrile_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/a" VPfile
+f = "thf_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/t" VPfile
+henry= 2 tc=25.0 GSOLV
+henry= 3 tc=25.0 GSOLV
+henry= 4 tc=25.0 GSOLV
+henry= 5 tc=25.0 GSOLV
+henry= 6 tc=25.0 GSOLV
+henry= 7 tc=25.0 GSOLV
+henry= 8 tc=25.0 GSOLV
+henry= 9 tc=25.0 GSOLV
+henry= 10 tc=25.0 GSOLV
\ No newline at end of file
diff --git a/icolos/config/unit_tests_config/cosmo/example.cosmo.config b/icolos/config/unit_tests_config/cosmo/example.cosmo.config
new file mode 100644
index 0000000..7dd7131
--- /dev/null
+++ b/icolos/config/unit_tests_config/cosmo/example.cosmo.config
@@ -0,0 +1,28 @@
+ctd = BP_TZVPD_FINE_19.ctd cdir = "/opt/scp/software/COSMOtherm/19.0.4/COSMOthermX/../COSMOtherm/CTDATA-FILES" ldir = "/opt/scp/software/COSMOtherm/19.0.4/COSMOthermX/../licensefiles"
+unit notempty wtln ehfile
+!! generated by COSMOthermX !!
+f = mol.cosmo
+f = "h2o_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/h" VPfile
+f = "methanol_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/m" VPfile
+f = "1-octanol_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" Comp = "1-octanol" [  VPfile
+f = "1-octanol_c1.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"
+f = "1-octanol_c2.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"
+f = "1-octanol_c3.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"
+f = "1-octanol_c4.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"
+f = "1-octanol_c5.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"
+f = "1-octanol_c6.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" ]
+f = "dimethylsulfoxide_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/d" VPfile
+f = "cyclohexane_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/c" VPfile
+f = "chcl3_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/c" VPfile
+f = "propanone_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/p" VPfile
+f = "acetonitrile_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/a" VPfile
+f = "thf_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/19.0.4/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/t" VPfile
+henry= 2 tc=25.0 GSOLV
+henry= 3 tc=25.0 GSOLV
+henry= 4 tc=25.0 GSOLV
+henry= 5 tc=25.0 GSOLV
+henry= 6 tc=25.0 GSOLV
+henry= 7 tc=25.0 GSOLV
+henry= 8 tc=25.0 GSOLV
+henry= 9 tc=25.0 GSOLV
+henry= 10 tc=25.0 GSOLV
\ No newline at end of file
diff --git a/icolos/config/unit_tests_config/example.config.json b/icolos/config/unit_tests_config/example.config.json
new file mode 100644
index 0000000..cd6bf11
--- /dev/null
+++ b/icolos/config/unit_tests_config/example.config.json
@@ -0,0 +1,30 @@
+{
+  "ICOLOS_TEST_DATA": "../../../IcolosData",
+  "OE_LICENSE": "/opt/scp/software/oelicense/1.0/oe_license.seq1",
+  "CREST_BINARY_LOCATION": "/projects/cc/mai/binaries",
+  "XTBHOME": "/projects/cc/mai/binaries/xtb-6.4.0",
+  "XTBPATH": "${XTBHOME}/share/xtb",
+  "TURBOMOLE_CONFIG": "/projects/cc/mai/material/Icolos/turbomole_config",
+  "ACPYPE": "/projects/cc/mai/binaries/acpype",
+  "PATH": "${PATH}:${XTBHOME}",
+  "PKG_CONFIG_PATH": "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig",
+  "PARNODES": "2",
+  "PARA_ARCH": "MPI",
+  "PANTHER_LOCATION": "/projects/cc/mai/binaries/panther",
+  "SHAEP_LOCATION": "/projects/cc/mai/binaries",
+  "FORCEFIELD": "/projects/cc/mai/material/Icolos/forcefields/charmm36-feb2021.ff",
+  "OPTUNA_AZ": {
+    "ENVIRONMENT_PYTHON": "<fill_path>/miniconda3/envs/Optuna_AZ/bin/python",
+    "ENTRY_POINT_LOCATION": "<fill_path>/OptunaAZ"
+  },
+  "PMX": {
+    "PMX": "<FILL_PATH>/envs/pmx/bin/pmx",
+	"PMX_PYTHON": "<FILL_PATH>/envs/pmx/bin/python",
+	"CLI_ENTRYPOINT": "<FILL_PATH>/pmx_az/src/pmx/scripts/mai_utils",
+	"GMXLIB": "<FILL_PATH>/px_az/src/pmx/data/mutff"
+  },
+  "DSSP": "/opt/scp/software/DSSP/4.0.0-GCCcore-8.2.0/bin",
+  "MINICONDA_BASE": ". /<path to miniconda>/etc/profile.d/conda.sh",
+  "OPENMM_FORCEFIELDS": "/projects/cc/mai/material/Icolos/openmmforcefields",
+  "MDPLOT": "/projects/cc/mai/binaries/MDplot"
+}
\ No newline at end of file
diff --git a/icolos/config_containers/__init__.py b/icolos/config_containers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/config_containers/container.py b/icolos/config_containers/container.py
new file mode 100644
index 0000000..dcc4d44
--- /dev/null
+++ b/icolos/config_containers/container.py
@@ -0,0 +1,33 @@
+import abc
+import json
+import os
+
+
+class ConfContainer(object, metaclass=abc.ABCMeta):
+    @abc.abstractmethod
+    def __init__(self, conf):
+        # get instance of configuration enum and load configuration
+        # parameter "config" can be a string, a path or a dictionary (as long as it holds valid JSON input)
+        if isinstance(conf, str):
+            if os.path.isfile(conf):
+                with open(conf) as file:
+                    conf = file.read().replace("\r", "").replace("\n", "")
+            conf = json.loads(conf)
+        self._conf = conf
+
+    def get_as_dict(self):
+        return self._conf
+
+    def get(self, key, default=None):
+        return self._conf.get(key, default)
+
+    def __getitem__(self, item):
+        return self.get_as_dict()[item]
+
+    def get_as_string(self):
+        return json.dumps(self._conf)
+
+    def validate(self):
+        raise NotImplementedError(
+            "This functions needs to be implemented by child classes."
+        )
diff --git a/icolos/config_containers/workflow_container.py b/icolos/config_containers/workflow_container.py
new file mode 100644
index 0000000..8f8e374
--- /dev/null
+++ b/icolos/config_containers/workflow_container.py
@@ -0,0 +1,13 @@
+from icolos.config_containers.container import ConfContainer
+
+
+class WorkflowContainer(ConfContainer):
+    def __init__(self, conf, validation=True):
+        super().__init__(conf=conf)
+
+        # TODO: include validation with JSON Schema
+        if validation:
+            self.validate()
+
+    def validate(self):
+        pass
diff --git a/icolos/core/__init__.py b/icolos/core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/core/composite_agents/__init__.py b/icolos/core/composite_agents/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/core/composite_agents/base_agent.py b/icolos/core/composite_agents/base_agent.py
new file mode 100644
index 0000000..c82a56c
--- /dev/null
+++ b/icolos/core/composite_agents/base_agent.py
@@ -0,0 +1,108 @@
+import os
+from abc import abstractmethod
+from copy import deepcopy
+from typing import Dict, List
+
+from pydantic import BaseModel, PrivateAttr
+
+from icolos.loggers.agentlogger import AgentLogger
+
+from icolos.utils.enums.logging_enums import LoggingConfigEnum
+from icolos.utils.enums.composite_agents_enums import WorkflowEnum
+
+_WE = WorkflowEnum()
+_LE = LoggingConfigEnum()
+
+
+class AgentEnvironmentParameters(BaseModel):
+    class WorkflowExportParameters(BaseModel):
+        key: str
+        value: str
+
+    export: List[WorkflowExportParameters] = []
+
+
+class AgentHeaderParametersSettings(BaseModel):
+    remove_temporary_files: bool = True
+    single_directory: bool = False
+
+
+class AgentHeaderParameters(BaseModel):
+    class AgentLoggingParameters(BaseModel):
+        logfile: str = None
+
+    id: str = None
+    description: str = None
+    logging: AgentLoggingParameters = AgentLoggingParameters()
+    environment: AgentEnvironmentParameters = None
+    global_variables: Dict = None
+    global_settings: AgentHeaderParametersSettings = AgentHeaderParametersSettings()
+
+
+class BaseAgent(BaseModel):
+
+    # should also work without parsing the base specification here, but then IDEs will not pick up stuff below
+    header: AgentHeaderParameters = AgentHeaderParameters()
+
+    class Config:
+        underscore_attrs_are_private = True
+
+    _logger = PrivateAttr()
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        self._logger = AgentLogger()
+
+    def _export_env_variables(self):
+        for var in self.header.environment.export:
+            key = str(var.key)
+            value = os.path.expandvars(str(var.value))
+            os.environ[key] = value
+            self._logger.log(f"Exported variable {key} with value {value}.", _LE.DEBUG)
+
+    def initialize(self):
+        self._export_env_variables()
+
+    def _nested_update(self, inp, pattern: str, replacement: str):
+        if isinstance(inp, dict):
+            items = inp.items()
+        elif isinstance(inp, (list, tuple)):
+            items = enumerate(inp)
+        elif isinstance(inp, str):
+            return inp.replace(pattern, replacement)
+        else:
+            return inp
+
+        for key, value in items:
+            inp[key] = self._nested_update(value, pattern, replacement)
+        return inp
+
+    def _update_global_variables(self, conf: dict) -> dict:
+        conf = deepcopy(conf)
+        if self.header.global_variables is not None:
+            for key, value in self.header.global_variables.items():
+                pattern = "{" + key + "}"
+                self._nested_update(inp=conf, pattern=pattern, replacement=value)
+                self._logger.log(
+                    f"Updated global variable {key} with value {value}.", _LE.DEBUG
+                )
+        return conf
+
+    @abstractmethod
+    def execute(self):
+        raise NotImplementedError
+
+    def is_valid(self) -> bool:
+        raise NotImplementedError
+
+    def set_id(self, id: str):
+        self.header.id = id
+
+    def get_id(self) -> str:
+        return self.header.id
+
+    def set_description(self, description: str):
+        self.header.description = description
+
+    def get_description(self) -> str:
+        return self.header.description
diff --git a/icolos/core/composite_agents/scheduler.py b/icolos/core/composite_agents/scheduler.py
new file mode 100644
index 0000000..41b4f57
--- /dev/null
+++ b/icolos/core/composite_agents/scheduler.py
@@ -0,0 +1,54 @@
+from pydantic import BaseModel, PrivateAttr
+
+from icolos.core.composite_agents.base_agent import BaseAgent, AgentHeaderParameters
+
+from icolos.utils.enums.logging_enums import LoggingConfigEnum
+from icolos.utils.enums.composite_agents_enums import SchedulerEnum
+
+_SE = SchedulerEnum()
+_LE = LoggingConfigEnum()
+
+
+class SchedulerHeaderParameters(AgentHeaderParameters, BaseModel):
+    pass
+
+
+class Scheduler(BaseAgent, BaseModel):
+    """Class to hold the whole logic for scheduling sub-jobs."""
+
+    header: SchedulerHeaderParameters = SchedulerHeaderParameters()
+
+    class Config:
+        underscore_attrs_are_private = True
+
+    _logger = PrivateAttr()
+    _initialized_steps = PrivateAttr()
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def initialize(self):
+        super().initialize()
+
+    def execute(self):
+        # TODO: implement
+        pass
+
+    def _action_prepare(self):
+        pass
+
+    def _action_run(self):
+        pass
+
+    def is_valid(self) -> bool:
+        # TODO: implement
+        pass
+
+    def __repr__(self):
+        return "<Icolos scheduler: id=%s, description=%s>" % (
+            self.get_id(),
+            self.get_description(),
+        )
+
+    def __str__(self):
+        return self.__repr__()
diff --git a/icolos/core/composite_agents/workflow.py b/icolos/core/composite_agents/workflow.py
new file mode 100644
index 0000000..515e6c0
--- /dev/null
+++ b/icolos/core/composite_agents/workflow.py
@@ -0,0 +1,143 @@
+from typing import Dict, List
+
+from pydantic import BaseModel, PrivateAttr
+from icolos.core.containers.perturbation_map import PerturbationMap
+from icolos.core.flow_control.flow_control import FlowControlBase
+from icolos.core.job_control.job_control import StepJobControl
+
+from icolos.core.steps_utils import initialize_step_from_dict
+from icolos.core.workflow_steps.step import StepBase
+from icolos.core.composite_agents.base_agent import BaseAgent, AgentHeaderParameters
+from icolos.utils.enums.step_enums import StepBaseEnum
+
+from icolos.utils.general.icolos_exceptions import get_exception_message
+
+from icolos.utils.enums.logging_enums import LoggingConfigEnum
+from icolos.utils.enums.composite_agents_enums import WorkflowEnum
+
+_WE = WorkflowEnum()
+_LE = LoggingConfigEnum()
+_SBE = StepBaseEnum
+
+
+class WorkflowHeaderParameters(AgentHeaderParameters, BaseModel):
+    pass
+
+
+class WorkflowData(BaseModel):
+    work_dir: str = None
+    perturbation_map: PerturbationMap = None
+
+
+class WorkFlow(BaseAgent, BaseModel):
+    """Class to hold the whole logic for a workflow."""
+
+    steps: List[Dict] = []
+    header: WorkflowHeaderParameters = WorkflowHeaderParameters()
+    workflow_data: WorkflowData = WorkflowData()
+
+    class Config:
+        underscore_attrs_are_private = True
+
+    _logger = PrivateAttr()
+    _initialized_steps = PrivateAttr()
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        self._initialized_steps = []
+
+    def initialize(self):
+        super().initialize()
+        self._initialized_steps = []
+        for step_conf in self.steps:
+            step_conf = self._update_global_variables(conf=step_conf)
+            step = initialize_step_from_dict(step_conf=step_conf)
+            if isinstance(step, StepBase):
+                # we have a normal step, no flow control wrapping
+                step.set_workflow_object(self)
+                self._initialized_steps.append(step)
+            elif isinstance(step, FlowControlBase):
+                # flow control has returned a list of steps, or a single JobControl step
+                if isinstance(step.initialized_steps, list):
+                    for st in step.initialized_steps:
+                        st.set_workflow_object(self)
+                        self._initialized_steps.append(st)
+                elif isinstance(step.initialized_steps, StepJobControl):
+                    # parallelize was set, returns a JobControl wrapper
+                    # step.initialized_steps.initialized_steps.
+                    # set_workflow_object(self)
+                    for st in step.initialized_steps.initialized_steps:
+                        st.set_workflow_object(self)
+                    self._initialized_steps.append(step.initialized_steps)
+        self._logger.log(
+            f"Initialized {len(self._initialized_steps)} steps in workflow {self.header.id}.",
+            _LE.DEBUG,
+        )
+
+    def execute(self):
+        for step in self._initialized_steps:
+            step.generate_input()
+            self._logger.log(f"Starting execution of step: {step.step_id}", _LE.INFO)
+            step.execute()
+            self._logger.log(
+                f"Processing write-out blocks for {step.step_id}.", _LE.DEBUG
+            )
+            step.process_write_out()
+        self._logger.log(
+            f"Execution of {len(self._initialized_steps)} steps completed.", _LE.INFO
+        )
+
+    def is_valid(self) -> bool:
+        try:
+            for step in self._initialized_steps:
+                step.validate()
+        except Exception as e:
+            self._logger.log(
+                f'During step validation, "WorkFlow" returned the following exception: {get_exception_message(e)}.',
+                _LE.WARNING,
+            )
+            return False
+        return True
+
+    def add_step(self, step: StepBase):
+        self._initialized_steps.append(step)
+
+    def get_steps(self) -> list:
+        return self._initialized_steps
+
+    def find_step_by_step_id(self, step_id: str):
+        for step in self._initialized_steps:
+            if step.step_id == step_id:
+                return step
+            elif step.type == _SBE.STEP_JOB_CONTROL:
+                # the steps themselves are buried in the _initialized_steps attribute of JobControl,
+                for st in step.initialized_steps:
+                    if st.step_id == step_id:
+                        return st
+
+        raise IndexError(f"Could not find step with step_id {step_id} in workflow.")
+
+    def __iter__(self):
+        return iter(self.steps)
+
+    def __repr__(self):
+        return "<Icolos workflow: id=%s, description=%s, number steps: %s>" % (
+            self.get_id(),
+            self.get_description(),
+            len(self),
+        )
+
+    def set_perturbation_map(self, p_map: PerturbationMap) -> None:
+        self.workflow_data.perturbation_map = p_map
+
+    def get_perturbation_map(self) -> PerturbationMap:
+        return self.workflow_data.perturbation_map
+
+    def __str__(self):
+        return self.__repr__()
+
+    def __getitem__(self, key: int):
+        return self._initialized_steps[key]
+
+    def __len__(self) -> int:
+        return len(self._initialized_steps)
diff --git a/icolos/core/containers/__init__.py b/icolos/core/containers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/core/containers/compound.py b/icolos/core/containers/compound.py
new file mode 100644
index 0000000..b36fc2d
--- /dev/null
+++ b/icolos/core/containers/compound.py
@@ -0,0 +1,549 @@
+from copy import deepcopy
+from typing import List
+from rdkit import Chem
+
+from icolos.utils.enums.compound_enums import (
+    CompoundContainerEnum,
+    EnumerationContainerEnum,
+)
+from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum
+from icolos.core.step_utils.structconvert import StructConvert
+from icolos.utils.general.icolos_exceptions import ContainerCorrupted
+from icolos.utils.enums.write_out_enums import WriteOutEnum
+from typing import Union
+import numpy as np
+import os
+
+_WE = WriteOutEnum()
+_SEE = SchrodingerExecutablesEnum()
+
+
+class Conformer:
+    """This class is a storage class for individual conformers associated with a given Enumeration."""
+
+    def __init__(
+        self,
+        conformer: Chem.Mol = None,
+        conformer_id: int = None,
+        enumeration_object=None,
+    ):
+        self._conformer = conformer
+        self._conformer_id = conformer_id
+        self._enumeration_object = enumeration_object
+        self._extra_data_dictionary = {}
+
+    def get_compound_name(self) -> str:
+        if self.get_enumeration_object() is not None:
+            return self.get_enumeration_object().get_compound_name()
+
+    def get_index_string(self) -> str:
+        enum_obj = self.get_enumeration_object()
+        enum_str = ""
+        if enum_obj is not None:
+            enum_str = enum_obj.get_index_string()
+        conf_str = ""
+        if self.get_conformer_id() is not None:
+            conf_str = str(self.get_conformer_id())
+        return ":".join([enum_str, conf_str])
+
+    def add_extra_data(self, key: str, data):
+        self._extra_data_dictionary[key] = data
+
+    def get_extra_data(self) -> dict:
+        return self._extra_data_dictionary
+
+    def clear_extra_data(self):
+        self._extra_data_dictionary = {}
+
+    def set_enumeration_object(self, enumeration_object):
+        self._enumeration_object = enumeration_object
+
+    def get_enumeration_object(self):
+        return self._enumeration_object
+
+    def get_molecule(self) -> Chem.Mol:
+        return self._conformer
+
+    def set_molecule(self, conformer: Chem.Mol):
+        self._conformer = conformer
+
+    def set_conformer_id(self, conformer_id: int):
+        self._conformer_id = conformer_id
+
+    def get_conformer_id(self) -> int:
+        return self._conformer_id
+
+    def empty(self) -> bool:
+        if self.get_molecule() is None:
+            return True
+        return False
+
+    def _clone(self):
+        clone = Conformer(
+            conformer=deepcopy(self.get_molecule()),
+            conformer_id=self.get_conformer_id(),
+            enumeration_object=self.get_enumeration_object(),
+        )
+        clone._extra_data_dictionary = deepcopy(self.get_extra_data())
+        return clone
+
+    def __copy__(self):
+        return self._clone()
+
+    def __deepcopy__(self, memo):
+        return self._clone()
+
+    def __repr__(self):
+        parent_enumeration_id = (
+            None
+            if self.get_enumeration_object() is None
+            else self.get_enumeration_object().get_enumeration_id()
+        )
+        return "<Icolos conformer: id=%s, parent enumeration: %s>" % (
+            self.get_conformer_id(),
+            parent_enumeration_id,
+        )
+
+    def __str__(self):
+        return self.__repr__()
+
+    def write(self, path: str, format_=_WE.SDF):
+        writer = Chem.SDWriter(path)
+        molecule = self.get_molecule()
+        molecule.SetProp(_WE.RDKIT_NAME, self.get_index_string())
+        molecule.SetProp(_WE.INDEX_STRING, self.get_index_string())
+        writer.write(molecule)
+        writer.close()
+        if format_ == _WE.PDB:
+            pdb_path = path.split(".")[0] + ".pdb"
+            # convert the written sdf file to a pdb with the schrodinger converter
+            converter = StructConvert(prefix_execution=_SEE.SCHRODINGER_MODULE)
+            converter.sdf2pdb(sdf_file=path, pdb_file=pdb_path)
+            os.remove(path)
+
+    def update_coordinates(self, path: str):
+        old = self.get_molecule()
+        for mol in Chem.SDMolSupplier(path, removeHs=False):
+            mol.SetProp(_WE.RDKIT_NAME, old.GetProp(_WE.RDKIT_NAME))
+            for prop in old.GetPropNames():
+                mol.SetProp(prop, old.GetProp(prop))
+            self.set_molecule(mol)
+
+            # only one molecule expected at this stage, so stop after first run
+            break
+        self.write("".join([path, "_out"]))
+
+
+class Enumeration:
+    """This class bundles all information on an enumeration, especially all conformers generated."""
+
+    def __init__(
+        self,
+        compound_object=None,
+        smile: str = "",
+        molecule: Chem.Mol = None,
+        original_smile: str = None,
+        enumeration_id: int = None,
+    ):
+        self._MC = CompoundContainerEnum()
+        self._EC = EnumerationContainerEnum()
+        self._smile = smile
+        self._compound_object = compound_object
+        self._molecule = molecule
+        self._original_smile = original_smile
+        self._enumeration_id = enumeration_id
+        self._conformers = []
+
+    def empty(self) -> bool:
+        if len(self.get_conformers()) == 0:
+            return True
+        return False
+
+    def get_compound_name(self) -> str:
+        if self.get_compound_object() is not None:
+            return self.get_compound_object().get_name()
+
+    def _get_next_conformer_id(self) -> int:
+        ids = [conf.get_conformer_id() for conf in self.get_conformers()]
+        if len(ids) == 0:
+            return 0
+        else:
+            return max(ids) + 1
+
+    def sort_conformers(
+        self, by_tag: Union[str, List[str]], reverse: bool = True, aggregation="sum"
+    ):
+        conformers = self.get_conformers()
+        if isinstance(by_tag, str):
+            conformers = sorted(
+                conformers,
+                key=lambda x: float(x.get_molecule().GetProp(by_tag)),
+                reverse=reverse,
+            )
+            self._conformers = conformers
+            self.reset_conformer_ids()
+        elif isinstance(by_tag, list):
+            # need to normalise the values, calculate max and min of each tag in the series
+            def normalise_tag(value, tag):
+                all_tag_values = [
+                    float(conf.get_molecule().GetProp(tag)) for conf in conformers
+                ]
+                max_tag = np.max(all_tag_values)
+                min_tag = np.min(all_tag_values)
+                return (float(value) - min_tag) / (max_tag - min_tag)
+
+            # if we specify multiple tags, aggregate according the the provided aggregation function
+            if aggregation == "sum":
+                conformers = sorted(
+                    conformers,
+                    key=lambda x: np.sum(
+                        [
+                            float(normalise_tag(x.get_molecule().GetProp(i), i))
+                            for i in by_tag
+                        ]
+                    ),
+                    reverse=reverse,
+                )
+                self._conformers = conformers
+            elif aggregation == "product":
+                conformers = sorted(
+                    conformers,
+                    key=lambda x: np.product(
+                        [
+                            float(normalise_tag(x.get_molecule().GetProp(i), i))
+                            for i in by_tag
+                        ]
+                    ),
+                    reverse=reverse,
+                )
+                self._conformers = conformers
+            else:
+                raise AttributeError(
+                    "Only sum or product aggregation modes are currently supported - ABORT"
+                )
+                # for ligand in self.ligands:
+
+    #    ligand.set_conformers(sorted(ligand.get_conformers(),
+    #                                 key=lambda x: float(x.GetProp(_ROE.GLIDE_DOCKING_SCORE)), reverse=False))
+    #    ligand.add_tags_to_conformers()
+
+    def find_conformer(self, conformer_id: int) -> Conformer:
+        conf = [
+            conf
+            for conf in self.get_conformers()
+            if conf.get_conformer_id() == conformer_id
+        ]
+        if len(conf) == 0:
+            raise IndexError(f"Could not find conformer with id {conformer_id}.")
+        elif len(conf) > 1:
+            raise ContainerCorrupted(
+                f"More than one conformer with id {conformer_id} found in the same Enumeration instance (compound_number: {self.get_enumeration_id()})."
+            )
+        return conf[0]
+
+    def get_conformer_ids(self) -> List[int]:
+        ids = [conf.get_conformer_id() for conf in self.get_conformers()]
+        return ids
+
+    def reset_conformer_ids(self):
+        for new_id, conf in enumerate(self.get_conformers()):
+            conf.set_conformer_id(conformer_id=new_id)
+
+    def add_conformer(self, conformer: Conformer, auto_update: bool = True):
+        """Add a new conformer. If "auto_update" is True, the Enumeration class will be set to "self" and
+        the conformer_id will be set to the next free index."""
+        conformer = deepcopy(conformer)
+        if auto_update:
+            conformer.set_enumeration_object(self)
+            conformer.set_conformer_id(self._get_next_conformer_id())
+        self._conformers.append(conformer)
+
+    def add_conformers(self, conformers: List[Conformer], auto_update: bool = True):
+        """Add new conformers. If "auto_update" is True, the Enumeration class will be set to "self" and
+        the conformer_id will be set to the next free index."""
+        for conformer in conformers:
+            self.add_conformer(conformer=conformer, auto_update=auto_update)
+
+    def get_index_string(self) -> str:
+        comp_obj = self.get_compound_object()
+        comp_str = ""
+        if comp_obj is not None:
+            comp_str = comp_obj.get_index_string()
+        enum_str = ""
+        if self.get_enumeration_id() is not None:
+            enum_str = str(self.get_enumeration_id())
+        return ":".join([comp_str, enum_str])
+
+    def clean_failed_conformers(self):
+        # all conformers, where the molecule has been set to None by a function can be considered to have failed
+        for idx in list(reversed(range(len(self._conformers)))):
+            if self._conformers[idx].get_molecule() is None:
+                del self._conformers[idx]
+        self.reset_conformer_ids()
+
+    def clear_molecule(self):
+        self._molecule = None
+
+    def clear_conformers(self):
+        self._conformers = []
+
+    def get_conformers(self) -> List[Conformer]:
+        return self._conformers
+
+    def clone_conformers(self) -> List[Conformer]:
+        return [deepcopy(conf) for conf in self._conformers]
+
+    def set_compound_object(self, compound_object):
+        self._compound_object = compound_object
+
+    def get_compound_object(self):
+        return self._compound_object
+
+    def set_enumeration_id(self, enumeration_id: int):
+        self._enumeration_id = enumeration_id
+
+    def get_enumeration_id(self) -> int:
+        return self._enumeration_id
+
+    def set_smile(self, smile: str):
+        self._smile = smile
+
+    def get_smile(self) -> str:
+        return self._smile
+
+    def set_molecule(self, molecule: Chem.Mol):
+        self._molecule = molecule
+
+    def get_molecule(self) -> Chem.Mol:
+        return self._molecule
+
+    def set_original_smile(self, original_smile: str):
+        self._original_smile = original_smile
+
+    def get_original_smile(self) -> str:
+        return self._original_smile
+
+    def _clone(self):
+        clone = Enumeration(
+            compound_object=self.get_compound_object(),
+            smile=self.get_smile(),
+            molecule=deepcopy(self.get_molecule()),
+            original_smile=self.get_original_smile(),
+            enumeration_id=self.get_enumeration_id(),
+        )
+        for conf in self.get_conformers():
+            conf = deepcopy(conf)
+            conf.set_enumeration_object(enumeration_object=clone)
+            clone.add_conformer(conf, auto_update=False)
+        return clone
+
+    def __copy__(self):
+        return self._clone()
+
+    def __deepcopy__(self, memo):
+        return self._clone()
+
+    def __repr__(self):
+        parent_compound_id = (
+            None
+            if self.get_compound_object() is None
+            else self.get_compound_object().get_compound_number()
+        )
+        return "<Icolos enumeration: id=%s, smile=%s, parent compound: %s, num_conformers: %i>" % (
+            self.get_enumeration_id(),
+            self.get_smile(),
+            parent_compound_id,
+            len(self._conformers),
+        )
+
+    def __str__(self):
+        return self.__repr__()
+
+    def __iter__(self):
+        return iter(self._conformers)
+
+    def __getitem__(self, key: int) -> Conformer:
+        return self._conformers[key]
+
+    def __len__(self) -> int:
+        return len(self.get_conformers())
+
+
+class Compound:
+    """This class bundles all information on a molecule and serves mainly to group enumerations."""
+
+    def __init__(self, name: str = "", compound_number: int = None):
+        self._CC = CompoundContainerEnum()
+        self._EC = EnumerationContainerEnum()
+        self._name = name
+        self._compound_number = compound_number
+        self._enumerations = []
+
+    def __repr__(self):
+        return "<Icolos compound: name=%s, compound_number=%s, enumerations=%s>" % (
+            self.get_name(),
+            self.get_compound_number(),
+            len(self.get_enumerations()),
+        )
+
+    def __str__(self):
+        return self.__repr__()
+
+    def get_index_string(self) -> str:
+        if self.get_compound_number() is not None:
+            return str(self.get_compound_number())
+        else:
+            return ""
+
+    def set_name(self, name: str):
+        self._name = name
+
+    def get_name(self) -> str:
+        return self._name
+
+    def set_compound_number(self, compound_number: int):
+        self._compound_number = compound_number
+
+    def get_compound_number(self) -> int:
+        return self._compound_number
+
+    def add_enumeration(self, enumeration: Enumeration, auto_update: bool = True):
+        """Add a new enumeration. If "auto_update" is True, the Compound class will be set to "self" and
+        the enumeration_id will be set to the next free index."""
+        enumeration = deepcopy(enumeration)
+        if auto_update:
+            enumeration.set_compound_object(self)
+            enumeration.set_enumeration_id(self._get_next_enumeration_id())
+        self._enumerations.append(enumeration)
+
+    def add_enumerations(
+        self, enumerations: List[Enumeration], auto_update: bool = True
+    ):
+        """Add new enumerations. If "auto_update" is True, the Compound class will be set to "self" and
+        the enumeration_id will be set to the next free index."""
+        for enumeration in enumerations:
+            self.add_enumeration(enumeration=enumeration, auto_update=auto_update)
+
+    def clear_enumerations(self):
+        self._enumerations = []
+
+    def find_enumeration(self, idx: int):
+        for enum in self.get_enumerations():
+            if enum.get_enumeration_id() == idx:
+                return enum
+
+    def get_enumerations(self) -> List[Enumeration]:
+        return self._enumerations
+
+    def _clone(self):
+        clone = Compound(
+            name=self.get_name(), compound_number=self.get_compound_number()
+        )
+        for enum in self.get_enumerations():
+            enum = deepcopy(enum)
+            enum.set_compound_object(compound_object=clone)
+            clone.add_enumeration(enum, auto_update=False)
+        return clone
+
+    def __iter__(self):
+        return iter(self._enumerations)
+
+    def __copy__(self):
+        return self._clone()
+
+    def __deepcopy__(self, memo):
+        return self._clone()
+
+    def __getitem__(self, key: int) -> Enumeration:
+        return self._enumerations[key]
+
+    def __len__(self) -> int:
+        return len(self.get_enumerations())
+
+    def _get_next_enumeration_id(self):
+        ids = [enum.get_enumeration_id() for enum in self.get_enumerations()]
+        if len(ids) == 0:
+            return 0
+        else:
+            return max(ids) + 1
+
+    def find_enumeration(self, enumeration_id: int) -> Enumeration:
+        enum = [
+            enum
+            for enum in self.get_enumerations()
+            if enum.get_enumeration_id() == enumeration_id
+        ]
+        if len(enum) == 0:
+            raise IndexError(f"Could not find enumeration with id {enumeration_id}.")
+        elif len(enum) > 1:
+            raise ContainerCorrupted(
+                f"More than one enumeration with id {enumeration_id} found in the same Compound instance (compound_number: {self.get_compound_number()})."
+            )
+        return enum[0]
+
+    def get_enumeration_ids(self) -> List[int]:
+        ids = [enum.get_enumeration_id() for enum in self.get_enumerations()]
+        return ids
+
+    def reset_enumeration_ids(self):
+        for new_id, enum in enumerate(self.get_enumerations()):
+            enum.set_enumeration_id(enumeration_id=new_id)
+
+    def reset_all_ids(self):
+        self.reset_enumeration_ids()
+        for enum in self.get_enumerations():
+            enum.reset_conformer_ids()
+
+    def update_all_relations(self):
+        for enum in self.get_enumerations():
+            enum.set_compound_object(self)
+            for conf in enum.get_conformers():
+                conf.set_enumeration_object(enum)
+
+    def empty(self) -> bool:
+        if len(self.get_enumerations()) == 0:
+            return True
+        return False
+
+    def unroll_conformers(self) -> List[Conformer]:
+        conformers = []
+        for enum in self.get_enumerations():
+            # guard against empty enumerations that might be used when constructing more complex data flows
+            if enum.empty():
+                continue
+            for conf in enum.get_conformers():
+                conformers.append(conf)
+        return conformers
+
+
+# TODO: Replacing these three functions by a wrapper object
+def get_compound_by_id(compounds: List[Compound], id: int) -> Compound:
+    for compound in compounds:
+        if compound.get_compound_number() == id:
+            return compound
+    raise ValueError(
+        f"Could not find compound with id {id} in list of length {len(compounds)}."
+    )
+
+
+def get_compound_by_name(compounds: List[Compound], name: str) -> Compound:
+    for compound in compounds:
+        if compound.get_name() == name:
+            return compound
+    raise ValueError(
+        f"Could not find compound with name {name} in list of length {len(compounds)}."
+    )
+
+
+def unroll_conformers(compounds: List[Compound]) -> List[Conformer]:
+    all_conformers = []
+    for comp in compounds:
+        all_conformers = all_conformers + comp.unroll_conformers()
+    return all_conformers
+
+
+def unroll_enumerations(compounds: List[Compound]) -> List[Enumeration]:
+    all_enumerations = []
+    for comp in compounds:
+        all_enumerations = all_enumerations + comp.get_enumerations()
+    return all_enumerations
diff --git a/icolos/core/containers/generic.py b/icolos/core/containers/generic.py
new file mode 100644
index 0000000..51bee70
--- /dev/null
+++ b/icolos/core/containers/generic.py
@@ -0,0 +1,210 @@
+from shutil import copyfile
+from distutils.dir_util import copy_tree
+import json
+import os
+import sys
+from typing import Any, List, Dict, Union
+from copy import Error
+
+
+class GenericData:
+    """Container class to hold generic data of any file type"""
+
+    def __init__(
+        self,
+        file_name: str,
+        file_data=None,
+        argument=True,
+        file_id: int = None,
+        extension: str = None,
+    ):
+        self._extension = (
+            extension if extension is not None else file_name.split(".")[-1]
+        )
+        self._file_name = file_name
+        self._file_data = file_data
+        self._file_id = file_id
+        # self._argument: bool = argument
+        self._file_size = self.calculate_file_size()
+
+    def get_file_name(self) -> str:
+        return self._file_name
+
+    def get_data(self) -> Any:
+        return self._file_data
+
+    def calculate_file_size(self):
+        return sys.getsizeof(self._file_data)
+
+    def get_extension(self):
+        return self._extension
+
+    def set_data(self, data):
+        self._file_data = data
+
+    def set_file_name(self, file_name):
+        self._file_name = file_name
+
+    def set_id(self, file_id):
+        self._file_id = file_id
+
+    def get_id(self):
+        return self._file_id
+
+    def set_extension(self, extension):
+        self._extension = extension
+
+    def write(self, path: str, join: bool = True, final_writeout: bool = False):
+        """
+        Handles all I/O operations for generic data.  Support for handling directories and symlinks
+        """
+        orig_path = path
+        if join:
+            path = os.path.join(path, self.get_file_name())
+
+        if str(self._file_data).startswith("/"):
+            # file data is a path, copy the file to the destination
+            # if it's a file, its stored like this because it's large (> 2GB)
+            if os.path.isfile(self._file_data):
+                if not final_writeout:
+                    # if this is a writeout to a step, we can simply create a simlink
+                    os.symlink(self._file_data, path, target_is_directory=False)
+                else:
+                    # we cannot do this for the final writeout since /scratch or /tmp will eventually get cleaned
+                    copyfile(self._file_data, path)
+
+            elif os.path.isdir(self._file_data):
+                # copy the entire directory to the parent dir
+                copy_tree(self._file_data, orig_path)
+        elif isinstance(self._file_data, list):
+            with open(path, "w") as f:
+                f.writelines(self._file_data)
+
+        elif isinstance(self._file_data, str):
+            with open(path, "w") as f:
+                f.write(self._file_data)
+        elif isinstance(self._file_data, dict):
+            with open(path, "w") as f:
+                f.write(json.dumps(self._file_data))
+        else:
+            with open(path, "wb") as f:
+                f.write(self._file_data)
+
+    def update_data(self, data):
+        if sys.getsizeof(data) != self._file_size:
+            self.set_data(data)
+
+    def __repr__(self):
+        return f"GenericData object - name: {self._file_name}, extension: {self._extension}."
+
+    def __str__(self):
+        return self.__repr__()
+
+
+class GenericContainer:
+    """Container class to hold the instances of the Generic class, separated by extension"""
+
+    def __init__(self):
+        self._file_dict: Dict[str, List] = {}
+
+    # self._paths = []
+    # self._strings = []
+
+    def add_file(self, file: GenericData):
+        ext = file.get_extension()
+        file.set_id(self.get_next_file_id(ext))
+        try:
+            self._file_dict[ext].append(file)
+        except NameError:
+            self._initialise_list(ext)
+            self._file_dict[ext].append(file)
+
+    def _initialise_list(self, ext):
+        self._file_dict[ext] = []
+
+    def get_next_file_id(self, ext):
+        ids = [file.get_id() for file in self.get_files_by_extension(ext)]
+        if len(ids) == 0:
+            return 0
+        else:
+            return max(ids) + 1
+
+    def get_file_by_index(self, index):
+        for file in self.get_flattened_files():
+            if file.get_id() == index:
+                return file
+
+    def add_files(self, files: List[GenericData]):
+        extensions = list(set([f.get_extension() for f in files]))
+        if len(extensions) > 1:
+            raise Error("Cannot have more than one type of file")
+        else:
+            if extensions[0] in self._file_dict.keys():
+                self._file_dict[extensions[0]].extend(files)
+            else:
+                self._file_dict[extensions[0]] = files
+
+    def get_all_files(self) -> Dict[str, List]:
+        return self._file_dict
+
+    def get_files_by_extension(self, ext: str) -> List[GenericData]:
+        if ext in self._file_dict.keys():
+            return self._file_dict[ext]
+        else:
+            self._initialise_list(ext)
+            return self._file_dict[ext]
+
+    def get_file_names_by_extension(self, ext: str):
+        try:
+            return [f.get_file_name() for f in self._file_dict[ext]]
+        except KeyError:
+            self._initialise_list(ext)
+            return [f.get_file_name() for f in self._file_dict[ext]]
+
+    def get_file_types(self):
+        return self._file_dict.keys()
+
+    def get_flattened_files(self) -> List[GenericData]:
+        rtn_files = []
+        for key in self._file_dict.keys():
+            for file in self._file_dict[key]:
+                rtn_files.append(file)
+        return rtn_files
+
+    def get_file_by_name(self, name):
+        for file in self.get_flattened_files():
+            if file.get_file_name() == name:
+                return file
+
+    def clear_file_dict(self):
+        self._file_dict = {}
+
+    def get_argument_by_extension(
+        self, ext, rtn_file_object=False
+    ) -> Union[GenericData, str]:
+        files = []
+        for file in self.get_flattened_files():
+            if file.get_extension() == ext:
+                files.append(file)
+        try:
+            assert len(files) == 1
+        except AssertionError:
+            print(
+                f"Found multiple files with extension {ext}, select the index of the file to be passed as an argument\n"
+            )
+            print("######################")
+            for idx, file in enumerate(files):
+                print(f"{idx}: {file.get_file_name()}")
+            print("######################")
+            index = input(">>> ")
+            files = [files[int(index)]]
+
+        if not rtn_file_object:
+            return files[0].get_file_name()
+        else:
+            return files[0]
+
+    def write_out_all_files(self, folder):
+        """flattens all files in the container and writes to the specified directory"""
+        for file in self.get_flattened_files():
+            file.write(folder)
diff --git a/icolos/core/containers/perturbation_map.py b/icolos/core/containers/perturbation_map.py
new file mode 100644
index 0000000..da62103
--- /dev/null
+++ b/icolos/core/containers/perturbation_map.py
@@ -0,0 +1,294 @@
+from typing import Dict, List, Optional
+from IPython.lib.display import IFrame
+import pandas as pd
+from icolos.core.containers.compound import Compound, Conformer, Enumeration
+from pyvis.network import Network
+from icolos.core.containers.generic import GenericData
+
+from icolos.utils.enums.step_enums import StepFepPlusEnum
+import os
+from pydantic import BaseModel
+
+
+_SFE = StepFepPlusEnum()
+
+
+class Node(BaseModel):
+    """
+    Container class for the nodes, wrapper class around a compound object
+    """
+
+    class Config:
+        arbitrary_types_allowed = True
+
+    node_id: str = None
+    node_hash: str = None
+    conformer: Conformer = Conformer()
+    node_connectivity: List = []
+
+    def __init__(self, **data) -> None:
+        super().__init__(**data)
+
+    def get_node_id(self) -> str:
+        return self.node_id
+
+    def get_node_color(self):
+        # TODO: Expand this so we have different colours for each connectivity number [1,10]
+        # this is just a placeholder for now
+        thresholds = {i: "c0affe" for i in range(10)}
+
+        num_connections = len(self.node_connectivity)
+        return thresholds[num_connections]
+
+    def set_node_id(self, node_id: str):
+        self.node_id = node_id
+
+    def get_conformer(self) -> Conformer:
+        return self.conformer
+
+    def set_conformer(self, conformer: Conformer) -> None:
+        self.conformer = conformer
+
+    def get_node_hash(self) -> str:
+        return self.node_hash
+
+    # TODO: add methods here to access connectivity and color attributes
+
+
+class Edge(BaseModel):
+    """
+    Simple container class the the edges in the perturbation map, specified entirely by the connected nodes
+    """
+
+    class Config:
+        arbitrary_types_allowed = True
+
+    node_from: Node = Node()
+    node_to: Node = Node()
+    total: str = None
+    mcs: str = None
+    chg: str = None
+    softbond: str = None
+    min_no_atoms: str = None
+    snapCoreRmsd: str = None
+    bidirSnapCoreRmsd: str = None
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def get_source_node_name(self):
+        return self.node_from.get_node_hash()
+
+    def get_destination_node_name(self):
+        return self.node_to.get_node_hash()
+
+    def get_edge_id(self) -> str:
+        # construct the edge ID from the node hashes, separated by '_'
+        return f"{self.node_from.get_node_hash()}_{self.node_to.get_node_hash()}"
+
+
+class PerturbationMap(BaseModel):
+    """Hold a map construction parsed from a csv (probabably from a parsed schrodinger log
+    file or something) and provide some utility methods for doing pmx calculations on the edges"""
+
+    class Config:
+        arbitrary_types_allowed = True
+
+    nodes: List[Node] = []
+    edges: List[Edge] = []
+    hash_map: Dict = {}
+    compounds: List[Compound] = []
+    protein: GenericData = None
+    vmap_output: IFrame = None
+    replicas: int = 3
+
+    def __init__(self, **data) -> None:
+        super().__init__(**data)
+
+    def _get_line_idx(self, data, id_str) -> int:
+        line = [e for e in data if id_str in e]
+        assert len(line) == 1
+        line = line[0]
+        return data.index(line)
+
+    def _get_conformer_by_id(self, comp_id: str) -> Optional[Conformer]:
+        # get the compund object based on the ID in the ligand table (compound names). At this stage in the workflow we have only one conformer per enumeration
+        try:
+            # standard icolos naming conventino
+            parts = comp_id.split(":")
+            compound_id = parts[0]
+            enumeration_id = parts[1]
+        except:
+            # a non-standard compound name has been used
+            compound_id = comp_id
+        for compound in self.compounds:
+            if compound.get_name().split(":")[0] == compound_id:
+                rtn_compound = compound
+                enums = rtn_compound.get_enumerations()
+
+                if len(enums) == 1:
+                    # easy case, there is only one enumeration, return it's single conformer
+
+                    # at this stage, the docking poses must have been filtered to a single entry
+                    # per enumeration (an enumerations should have been filtered on charge state etc.)
+                    return enums[0].get_conformers()[0]
+                else:
+                    # multiple enumerations, must be using Icolos naming or we cannot infer which
+                    # enumeration should be used
+                    enum = rtn_compound.find_enumeration(
+                        enumeration_id=int(enumeration_id)
+                    )
+                    return enum.get_conformers()[0]
+
+        # TODO: Remove this before integration
+        print(
+            f"Compound with id {compound_id} was not found in the map - it may have been lost during map construction"
+        )
+
+    def parse_map_file(self, file_path) -> None:
+        # we need to do some format enforcement here (schrodinger or otherwise)
+
+        with open(file_path, "r") as f:
+            data = f.readlines()
+
+        start_edge = self._get_line_idx(data, _SFE.EDGE_HEADER_LINE)
+        start_node = self._get_line_idx(data, _SFE.NODE_HEADER_LINE)
+        stop_node = self._get_line_idx(data, _SFE.SIMULATION_PROTOCOL)
+        edge_info_start = self._get_line_idx(data, _SFE.SIMILARITY)
+
+        # TODO: refactor that
+        # clean up the data from schrodinger
+        split_data = []
+        for line in data:
+            split_line = line.split("  ")
+            stripped_line = []
+            for element in split_line:
+                if not element.isspace() and element:
+                    stripped_line.append(element.strip())
+            split_data.append(stripped_line)
+
+        data = split_data
+
+        map_info = pd.DataFrame(
+            data[start_edge + 3 : start_node - 1],
+            index=None,
+            columns=[
+                "Short ID",
+                "ligand1 -> ligand2",
+                "Bennett ddG",
+                "Cycle Closure ddG",
+                "Complex dG",
+                "Solvent dG",
+            ],
+        )
+        node_info = pd.DataFrame(
+            data[start_node + 3 : stop_node - 1],
+            index=None,
+            columns=[
+                "hash_id",
+                "node_id",
+                "Predicted dG",
+                "Experimental dG",
+                "Predicted Solvation dG",
+                "Experimental Solvation dG",
+            ],
+        )
+        edge_info = pd.DataFrame(
+            data[edge_info_start + 3 : -1],
+            columns=[
+                "Short ID",
+                "Total",
+                "Mcs",
+                "Charge",
+                "SoftBond",
+                "MinimumNumberOfAtom",
+                "SnapCoreRmsd",
+                "BidirectionSnapCore",
+            ],
+        ).dropna()
+        for hash_id, node_id in zip(node_info["hash_id"], node_info["node_id"]):
+            # map the hashes to the compound IDs
+            self.hash_map[hash_id] = node_id
+            node = Node(
+                node_id=node_id,
+                node_hash=hash_id,
+                conformer=self._get_conformer_by_id(node_id),
+            )
+            # generate the Node object to wrap the compound
+            self.nodes.append(node)
+
+        for _, edge in edge_info.iterrows():
+            edge = Edge(
+                node_from=self._get_node_by_hash_id(edge[0].split("_")[0]),
+                node_to=self._get_node_by_hash_id(edge[0].split("_")[1]),
+                total=edge[1],
+                mcs=edge[2],
+                chg=edge[3],
+                softbond=edge[4],
+                min_no_atoms=edge[5],
+                snapCoreRmsd=edge[6],
+                bidirSnapCoreRmsd=edge[7],
+            )
+            self.edges.append(edge)
+        # process the node info, generate the hash map
+        for node in self.nodes:
+            self._attach_node_connectivity(node)
+
+    def _attach_node_connectivity(self, node: Node):
+        # looks through the constructed edges, returns ids of any edges that have the specified node as one component
+        connected_edges = []
+        for edge in self.edges:
+            if (
+                edge.node_from.get_node_hash() == node.node_hash
+                or edge.node_to.get_node_hash() == node.node_hash
+            ):
+                connected_edges.append(edge.get_edge_id())
+        node.node_connectivity = connected_edges
+
+    def _get_node_by_node_id(self, node_id: str) -> Node:
+        for node in self.nodes:
+            if node.node_id == node_id:
+                return node
+
+    def _get_node_by_hash_id(self, hash_id: str) -> Node:
+        for node in self.nodes:
+            if node.node_hash == hash_id:
+                return node
+
+    def get_edges(self) -> List[Edge]:
+        return self.edges
+
+    def get_nodes(self) -> List[Node]:
+        return self.nodes
+
+    def visualise_perturbation_map(self, write_out_path: str) -> None:
+        """method for visualising the data as map with pyvis - Network"""
+        vmap = Network(directed=True)
+        vmap.barnes_hut()
+
+        # this is not an iterable
+        for edge in self.edges:
+            vmap.add_node(
+                edge.get_source_node_name(), color=edge.node_from.get_node_color()
+            )
+            vmap.add_node(
+                edge.get_destination_node_name(), color=edge.node_to.get_node_color()
+            )
+            vmap.add_edge(
+                source=edge.get_source_node_name(),
+                to=edge.get_destination_node_name(),
+                length=edge.total,
+                label="total: " + str(edge.total),
+                title="Mcs: " + str(edge.mcs) + ", SnapCoreRMSD: ",
+            )
+        self.vmap_output = vmap.show(os.path.join(write_out_path, "vmap.html"))
+        # return self.vmap_output
+
+    def get_protein(self) -> GenericData:
+        return self.protein
+
+    def __repr__(self) -> str:
+        return f"Icolos Perturbation Map object containing {len(self.edges)} edges and {len(self.nodes)} nodes"
+
+    def __str__(self) -> str:
+        return self.__repr__()
diff --git a/icolos/core/flow_control/__init__.py b/icolos/core/flow_control/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/core/flow_control/flow_control.py b/icolos/core/flow_control/flow_control.py
new file mode 100644
index 0000000..32377b9
--- /dev/null
+++ b/icolos/core/flow_control/flow_control.py
@@ -0,0 +1,68 @@
+from typing import List
+from pydantic import BaseModel, PrivateAttr
+from icolos.core.workflow_steps.step import StepSettingsParameters
+from icolos.core.workflow_steps.step import StepBase
+from icolos.loggers.steplogger import StepLogger
+from icolos.core.workflow_steps.step import (
+    StepData,
+    StepInputParameters,
+    StepWriteoutParameters,
+    StepExecutionParameters,
+)
+from icolos.utils.enums.step_enums import StepBaseEnum
+from icolos.utils.enums.step_initialization_enum import StepInitializationEnum
+from icolos.utils.general.convenience_functions import nested_get
+
+_SIE = StepInitializationEnum()
+
+
+class BaseStepConfig(BaseModel):
+    """
+    Minimal template class for the base config, without the unnecessary stuff that StepBase requires
+    """
+
+    step_id: str = None
+    work_dir: str = None
+    type: str = None
+    data: StepData = StepData()
+    input: StepInputParameters = StepInputParameters()
+    writeout: List[StepWriteoutParameters] = []
+    execution: StepExecutionParameters = StepExecutionParameters()
+    settings: StepSettingsParameters = StepSettingsParameters()
+
+    def _as_dict(self):
+        return {
+            "step_id": self.step_id,
+            "type": self.type,
+            "execution": self.execution,
+            "settings": self.settings,
+            "work_dir": self.work_dir,
+            "data": self.data,
+            "input": self.input,
+            "writeout": self.writeout,
+        }
+
+
+class FlowControlBase(BaseModel):
+    # List of steps to be iterated over, each set needs their inputs chained together
+    base_config: List[BaseStepConfig] = None
+    initialized_steps: List[StepBase] = None
+    _logger = PrivateAttr()
+
+    def __init__(self, **data) -> None:
+        super().__init__(**data)
+        self._logger = StepLogger()
+
+    def _initialize_step_from_dict(self, step_conf: dict):
+        # TODO: check if overlaps with the other "initialize_step_from_dict" method
+        # Require a separate initialisation method to avoid circular import
+        _STE = StepBaseEnum
+
+        step_type = nested_get(step_conf, _STE.STEP_TYPE, default=None)
+        step_type = None if step_type is None else step_type.upper()
+        if step_type in _SIE.STEP_INIT_DICT.keys():
+            return _SIE.STEP_INIT_DICT[step_type](**step_conf)
+        else:
+            raise ValueError(
+                f"Backend for step {nested_get(step_conf, _STE.STEPID, '')} unknown."
+            )
diff --git a/icolos/core/flow_control/iterator.py b/icolos/core/flow_control/iterator.py
new file mode 100644
index 0000000..63781fa
--- /dev/null
+++ b/icolos/core/flow_control/iterator.py
@@ -0,0 +1,223 @@
+from typing import Dict, List, Union
+from pydantic import BaseModel
+
+from icolos.core.flow_control.flow_control import BaseStepConfig, FlowControlBase
+from copy import deepcopy
+from icolos.core.job_control.job_control import StepJobControl
+from icolos.core.workflow_steps.step import _LE
+from icolos.utils.enums.step_enums import StepBaseEnum
+from icolos.core.workflow_steps.step import StepBase
+from icolos.utils.enums.step_enums import IteratorEnum
+import os
+
+_IE = IteratorEnum
+_SBE = StepBaseEnum
+
+
+class IterSettingsParameters(BaseModel):
+    # unpacked version of StepSettingsParameters
+    flags: List = []
+    parameters: Dict = {}
+    additional: Dict = {}
+
+
+class IterParallelizer(BaseModel):
+
+    # if true, steps must be totally independent, the iterator step
+    parallelize: bool = False
+    cores: int = 1
+    dependent_steps: int = None
+
+
+class IterSettings(BaseModel):
+    # dictionary of settings to change
+
+    # settings: {step_id: {IterSettingsParameters}}
+    settings: Dict[str, IterSettingsParameters] = {}
+    iter_mode: _IE = _IE.N_ITERS
+    n_iters: int = None
+    parallelizer_settings: IterParallelizer = IterParallelizer()
+
+
+class StepIterator(FlowControlBase, BaseModel):
+    """
+    Implements iterator mechanism:
+    wraps one or multiple steps and generates n copies of that set of steps according to iter_config
+    Becomes master job when parallize=True, using  Icolos JobControl to interface with external resources
+    """
+
+    # holds the dict of iterables for the bits to chang
+    iter_settings: IterSettings = IterSettings()
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        # when init_step_from_dict calls this method, we need to initialise a list of steps,
+        # controlled by iter_settings.iter_mode
+        self.initialized_steps = self._initialize_steps()
+        # either generate a list, if serial execution, or initialize a single JobControl
+        # step with each config as an initialized step
+
+    def _initialize_n_iters(self) -> List:
+        """
+        Initialise n identical copies of the same step config
+        """
+        init_steps = []
+        for i in range(self.iter_settings.n_iters):
+
+            list_step_conf = deepcopy(self.base_config)
+
+            # hand all steps over to the config updater
+            formatted_confs = self._update_config(list_step_conf, f"run_{i}")
+            for step_conf in formatted_confs:
+                initialized_step = self._initialize_step_from_dict(step_conf._as_dict())
+                init_steps.append(initialized_step)
+        return init_steps
+
+    def _modify_settings(self, settings, step_config, i: int):
+        base_conf = deepcopy(step_config)
+        iter_settings = deepcopy(settings)
+        if settings.flags:
+            settings.flags = {"flags": settings.flags}
+
+            # iterate over the flags
+            # if it's been converted, hence there are flags to be applied
+            base_conf.settings.arguments.flags.append(iter_settings.flags.values()[i])
+        for (
+            key,
+            val,
+        ) in iter_settings.parameters.items():
+            base_conf.settings.arguments.parameters[key] = val[i]
+        for (
+            key,
+            val,
+        ) in iter_settings.additional.items():
+            # however many lists of n items
+            base_conf.settings.additional[key] = val[i]
+
+        return base_conf
+
+    def _initialize_single(self) -> List:
+        """
+        Iterate through all settings step-wise, changing all setting blocks simultaneously, returning n initialised steps for n
+        """
+        init_steps = []
+        for i in range(self.iter_settings.n_iters):
+
+            # iterate over the steps in the base config, and the corresponding settings, if these are to be modified
+            step_sublist = []
+            for step_config in self.base_config:
+
+                # check if we need to iterate through settings in this step, else just use the base config
+                if step_config.step_id in self.iter_settings.settings.keys():
+                    settings = self.iter_settings.settings[step_config.step_id]
+                    step_sublist.append(self._modify_settings(settings, step_config, i))
+                else:
+                    step_sublist.append(step_config)
+
+            # update all configs with references to updated step_ids etc
+            formatted_configs = self._update_config(step_sublist, f"run_{i}")
+            for step_conf in formatted_configs:
+                initialized_step = self._initialize_step_from_dict(step_conf._as_dict())
+                init_steps.append(initialized_step)
+        return init_steps
+
+    # def _initialize_compounds(self):
+    #     """
+    #     Generates n copies of a step, each with a single compound loaded from the source step
+    #     * Only the first step in base_config needs updating, downstream data handover from this step is handed properly anyway
+    #     """
+    #     init_steps = []
+    #     # TODO: get the number of compounds automatically?
+    #     for i in range(self.iter_settings.n_iters):
+    #         list_step_conf = deepcopy(self.base_config)
+    #         first_step_config = list_step_conf[0]
+    #         # probably only expecting one set of input compounds but this will select the ith for all inputs
+    #         for inp_block in first_step_config.input.compounds:
+    #             inp_block.selected_compound_id = i
+    #         formatted_confs = self._update_config(list_step_conf, f"run_{i}")
+    #         for step_conf in formatted_confs:
+    #             initialized_step = self._initialize_step_from_dict(step_conf.as_dict())
+    #             init_steps.append(initialized_step)
+    #     return init_steps
+
+    def _initialize_steps(self) -> Union[List, StepBase]:
+        """
+        Handle step init according to config
+        Returns a list of steps if serial execution (default)
+        Returns a Step-like JobControl object if parallelisation is specified.
+        """
+        steps = []
+        if self.iter_settings.iter_mode == _IE.N_ITERS:
+            # simplest mode, just n repeats of the same step
+            steps += self._initialize_n_iters()
+
+        elif self.iter_settings.iter_mode == _IE.SINGLE:
+            # for n different settings, iterate through each, returning n steps
+            steps += self._initialize_single()
+        elif self.iter_settings.iter_mode == _IE.ALL:
+            raise NotImplementedError
+            # initialise all combinations of steps by combining settings
+            # steps.append(self._initialize_combined())
+
+        self._logger.log(
+            f"Iterator has initialized {len(steps)} steps for step  {self.base_config[0].step_id}",
+            _LE.DEBUG,
+        )
+        if not self.iter_settings.parallelizer_settings.parallelize:
+            return steps
+        else:
+
+            wrapper = StepJobControl(
+                step_id="JobControl",
+                type=_SBE.STEP_JOB_CONTROL,
+                initialized_steps=steps,
+                parallel_execution=self.iter_settings.parallelizer_settings,
+            )
+            return wrapper
+
+    def _update_config(
+        self, step_conf: List[BaseStepConfig], run_id: str
+    ) -> List[BaseStepConfig]:
+        """
+        Manages modifications to each step in the config:
+        * step_id is updated with the run_id
+        * any references to other step_ids (e.g. in input) contained in the base config are updated to reflect the change
+        * writeout paths are updated to separate output from each of the runs
+        """
+        original_step_ids = [conf.step_id for conf in step_conf]
+        formatted_confs = []
+        for conf in step_conf:
+            # modify the step_id
+            st_id = conf.step_id
+            conf.step_id = st_id + "_" + run_id
+            # modify the writeout paths: add a key_value dir the writeout path
+            for idx, block in enumerate(conf.writeout):
+                if block.destination.resource is not None:
+                    resource = block.destination.resource
+                    parts = resource.split("/")
+                    new_resource = os.path.join("/".join(parts[:-1]), run_id, parts[-1])
+                    block.destination.resource = new_resource
+
+            # modify the step_input blocks if they reference a step_id contained in step_conf
+            # treat compounds
+            for comp in conf.input.compounds:
+                if comp.source in original_step_ids:
+                    comp.source += f"_{run_id}"
+
+            for gen in conf.input.generic:
+                if gen.source in original_step_ids:
+                    gen.source += f"_{run_id}"
+
+            # TODO: this is a bodge for now
+            # we have an edge case in data manipularion that needs to match compounds those from another step, the source name needs the same treatment
+            if conf.type.upper() == _SBE.STEP_DATA_MANIPULATION:
+                if (
+                    _SBE.INPUT_SOURCE in conf.settings.additional.keys()
+                    and conf.settings.additional[_SBE.INPUT_SOURCE] in original_step_ids
+                ):
+
+                    conf.settings.additional[_SBE.INPUT_SOURCE] += f"_{run_id}"
+
+            formatted_confs.append(conf)
+
+        return formatted_confs
diff --git a/icolos/core/job_control/__init__.py b/icolos/core/job_control/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/core/job_control/job_control.py b/icolos/core/job_control/job_control.py
new file mode 100644
index 0000000..5d4cf68
--- /dev/null
+++ b/icolos/core/job_control/job_control.py
@@ -0,0 +1,89 @@
+from typing import List
+from pydantic.main import BaseModel
+
+from icolos.core.workflow_steps.step import StepBase
+from icolos.utils.general.parallelization import Parallelizer, SubtaskContainer
+from icolos.core.workflow_steps.step import _LE
+
+
+class IterParallelizer(BaseModel):
+    # config block controlling how the steps are parallelized
+    # if you are executing a 5 step workflow with 10 repeats, dependent_steps = 5, cores = 10
+    # this will allow each independent replica to be allocated to a single job queue, retaining step order
+    parallelize: bool = False
+    cores: int = 1
+    dependent_steps: int = None
+
+
+class StepJobControl(StepBase, BaseModel):
+    """
+    Step class containing job control functionality required for StepIterator, supports Slurm for job scheduling
+    Supports running Icolos process as master job for parallel step execution on cluster.  Generates a pool of initialized steps to be executed, based on the
+    """
+
+    initialized_steps: List = []
+    # expect the parallel execution block to be handed over from flow control
+    parallel_execution: IterParallelizer = IterParallelizer()
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def _prepare_batch(self, batch) -> List[List[StepBase]]:
+
+        batch_steps = []
+        for sublist in batch:
+            sublist_steps = []
+            for task in sublist:
+                sublist_steps.append(task.data)
+            batch_steps.append(sublist_steps)
+        return batch_steps
+
+    def execute(self):
+        """
+        Execute multiple steps in parallel
+        """
+        # Spin up multiple processes
+        self.execution.parallelization.cores = self.parallel_execution.cores
+        # each subtask needs to contain an entire mini workflow to be executed sequentially,
+        self.execution.parallelization.max_length_sublists = (
+            self.parallel_execution.dependent_steps
+        )
+
+        # if we try steps multiple times, we have steps fail depending on its dependency on a
+        # previous step - too complicated
+        self._subtask_container = SubtaskContainer(max_tries=1)
+        self._subtask_container.load_data(self.initialized_steps)
+
+        parallelizer = Parallelizer(func=self._run_step)
+        n = 1
+
+        while self._subtask_container.done() is False:
+
+            next_batch = self._get_sublists(
+                get_first_n_lists=self.parallel_execution.cores
+            )  # return n lists of length max_sublist_length
+            _ = [sub.increment_tries() for element in next_batch for sub in element]
+            _ = [sub.set_status_failed() for element in next_batch for sub in element]
+
+            self._logger.log(
+                f"Starting {len(next_batch)} parallel jobs under Icolos JobControl, execution batch {n}",
+                _LE.INFO,
+            )
+
+            steps = self._prepare_batch(next_batch)
+
+            result = parallelizer.execute_parallel(steps=steps)
+
+            # sucessful execution of each step is not explicitly checked,
+            # the step is responsible for throwing errors if something has gone wrong
+            for task in next_batch:
+                for subtask in task:
+                    subtask.set_status_success()
+
+    def _run_step(self, steps: List[StepBase]):
+        # submits then monitors the step
+        for step in steps:  # length max_len_sublist
+            # at this point the internal steps don't have their data initialised
+            step.generate_input()
+            step.execute()
+            step.process_write_out()
diff --git a/icolos/core/step_utils/__init__.py b/icolos/core/step_utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/core/step_utils/input_merger.py b/icolos/core/step_utils/input_merger.py
new file mode 100644
index 0000000..ecc3566
--- /dev/null
+++ b/icolos/core/step_utils/input_merger.py
@@ -0,0 +1,114 @@
+from copy import deepcopy
+from typing import List, Dict
+from pydantic import BaseModel
+
+from icolos.core.containers.compound import Enumeration, Compound
+from icolos.utils.enums.step_enums import StepBaseEnum
+
+_SBE = StepBaseEnum
+
+
+class StepMerge(BaseModel):
+    compounds: bool = True
+    enumerations: bool = False
+    merge_compounds_by: str = _SBE.INPUT_MERGE_BY_NAME
+    merge_enumerations_by: str = _SBE.INPUT_MERGE_BY_ID
+
+
+class InputMerger:
+    def __init__(self, config: StepMerge):
+        self.config = config
+
+    def _group_enumerations(
+        self, enumerations: List[Enumeration], by
+    ) -> Dict[str, List[Enumeration]]:
+        if by == _SBE.INPUT_MERGE_BY_SMILE:
+            grouped = {enumeration.get_smile(): [] for enumeration in enumerations}
+            for enum in enumerations:
+                grouped[enum.get_smile()].append(enum)
+        elif by == _SBE.INPUT_MERGE_BY_ID:
+            grouped = {
+                str(enumeration.get_enumeration_id()): []
+                for enumeration in enumerations
+            }
+            for enum in enumerations:
+                grouped[str(enum.get_enumeration_id())].append(enum)
+        else:
+            raise NotImplementedError
+        return grouped
+
+    def _group_compounds(
+        self, compounds: List[Compound], by
+    ) -> Dict[str, List[Compound]]:
+        if by == _SBE.INPUT_MERGE_BY_NAME:
+            names = {compound.get_name(): [] for compound in compounds}
+            for compound in compounds:
+                names[compound.get_name()].append(compound)
+        elif by == _SBE.INPUT_MERGE_BY_ID:
+            names = {str(compound.get_compound_number()): [] for compound in compounds}
+            for compound in compounds:
+                names[str(compound.get_compound_number())].append(compound)
+        else:
+            raise NotImplementedError
+        return names
+
+    def _merge_enumerations(
+        self, enumerations: List[Enumeration], by
+    ) -> List[Enumeration]:
+        list_result = []
+
+        # note that if it has been grouped by ID, the first (arbitrary) smile is used
+        for _, enum_list in self._group_enumerations(enumerations, by).items():
+            enum_combined = deepcopy(enum_list[0])
+            enum_combined.clear_conformers()
+            for enum in enum_list:
+                enum_combined.add_conformers(
+                    deepcopy(enum.get_conformers()), auto_update=False
+                )
+            list_result.append(enum_combined)
+        return list_result
+
+    def unroll_compounds(self, compounds: list) -> List[Compound]:
+        list_buffer = []
+        for ele in compounds:
+            if isinstance(ele, list):
+                list_buffer = list_buffer + self.unroll_compounds(ele)
+            elif isinstance(ele, Compound):
+                list_buffer.append(ele)
+        return list_buffer
+
+    def merge(self, compounds: List[Compound]) -> List[Compound]:
+        list_result = []
+
+        # if selected, combined compounds into one depending on the strategy
+        if self.config.compounds:
+            dict_grouped = self._group_compounds(
+                compounds, self.config.merge_compounds_by
+            )
+            number = 0
+            for name, compound_list in dict_grouped.items():
+                # add the enumerations of all compounds together but do NOT auto-update yet (because enumerations might
+                # also be merged later on)
+                comp_combined = Compound(name=name, compound_number=number)
+                for comp in compound_list:
+                    comp_combined.add_enumerations(
+                        deepcopy(comp.get_enumerations()), auto_update=False
+                    )
+
+                # as merging of enumerations only makes sense when there was a compound merge, keep
+                # it on that indentation level
+                if self.config.enumerations:
+                    enumerations = self._merge_enumerations(
+                        deepcopy(comp_combined.get_enumerations()),
+                        self.config.merge_enumerations_by,
+                    )
+                    comp_combined.clear_enumerations()
+                    comp_combined.add_enumerations(enumerations, auto_update=False)
+
+                # now, rename the enumerations and conformers
+                comp_combined.reset_all_ids()
+                comp_combined.update_all_relations()
+
+                list_result.append(comp_combined)
+                number += 1
+        return list_result
diff --git a/icolos/core/step_utils/input_preparator.py b/icolos/core/step_utils/input_preparator.py
new file mode 100644
index 0000000..7c4da81
--- /dev/null
+++ b/icolos/core/step_utils/input_preparator.py
@@ -0,0 +1,535 @@
+from icolos.core.containers.generic import GenericContainer, GenericData
+import json
+import pandas as pd
+from rdkit import Chem
+
+from icolos.loggers.base_logger import BaseLogger
+from icolos.utils.enums.input_enums import InputEnum
+from icolos.utils.enums.logging_enums import LoggingConfigEnum
+from icolos.utils.enums.write_out_enums import WriteOutEnum
+from icolos.utils.general.icolos_exceptions import StepFailed
+from icolos.utils.smiles import to_smiles
+from icolos.utils.general.files_paths import infer_input_type
+
+from typing import List, Any
+from pydantic import BaseModel
+
+from icolos.core.step_utils.input_merger import InputMerger, StepMerge
+from icolos.core.containers.compound import Enumeration, Compound, Conformer
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+import os
+from tempfile import mkdtemp
+import requests
+
+_SBE = StepBaseEnum
+_LE = LoggingConfigEnum()
+_WE = WriteOutEnum()
+_SGE = StepGromacsEnum()
+_IE = InputEnum()
+
+
+class StringPath(str):
+    def __new__(cls, content):
+        return super().__new__(cls, content)
+
+
+class StringFile(str):
+    def __new__(cls, content):
+        return super().__new__(cls, content)
+
+
+class StepData(BaseModel):
+    class Config:
+        arbitrary_types_allowed = True
+
+    compounds: List[Compound] = []
+    generic: GenericContainer = GenericContainer()
+
+
+class StepCSVInputColumnParameters(BaseModel):
+    smiles: str
+    names: str = None
+
+
+class StepInputEnforceIDs(BaseModel):
+    compound_ids: List = None
+    enumeration_ids: List = None
+
+
+class StepInputSource(BaseModel):
+    source: str
+    source_type: str = None
+    source_field: str = _IE.SOURCE_FIELD_COMPOUNDS
+    target_field: str = _IE.SOURCE_FIELD_COMPOUNDS
+    extension: str = None
+    format: str = None
+    delimiter: str = ","
+    columns: StepCSVInputColumnParameters = None
+    enforce_ids: StepInputEnforceIDs = None
+
+
+class StepInputParameters(BaseModel):
+    compounds: List[StepInputSource] = []
+    generic: List[StepInputSource] = []
+    perturbation_map: List[StepInputSource] = None
+    merge: StepMerge = StepMerge()
+    work_dir: str = None
+
+
+class InputPreparator(BaseModel):
+    workflow: Any = None
+    logger: BaseLogger = None
+
+    class Config:
+        underscore_attrs_are_private = True
+        arbitrary_types_allowed = True
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def generate_input(self, step_input: StepInputParameters, step_type):
+        compounds = self._generate_compound_input(step_input)
+        generic = (
+            self._generate_generic_input(step_input, step_type)
+            if step_input.generic
+            else GenericContainer()
+        )
+        # Instruct the step to run in a specific workdir, e.g. from a previously failed job or to execute a few related steps in the same dir
+        if step_input.work_dir is not None:
+            if os.path.isdir(step_input.work_dir):
+                work_dir = step_input.work_dir
+                self.logger.log(
+                    f"Found specified work dir at {step_input.work_dir}", _LE.DEBUG
+                )
+                # now check whether this needs attaching to the workflow for the rest of the steps
+                if self.workflow.header.global_settings.single_directory:
+                    self.workflow.workflow_data.work_dir = work_dir
+                    self.logger.log(
+                        f"Setting workdir  at {step_input.work_dir} to the workflow's workdir",
+                        _LE.DEBUG,
+                    )
+            else:
+                # last resort, if a previous step_id has been passed, get the work_dir from there
+
+                work_dir = self.workflow.find_step_by_step_id(
+                    step_input.work_dir
+                ).work_dir
+        elif (
+            self.workflow is not None
+            and self.workflow.header.global_settings.single_directory
+        ):
+            # Entire workflow running in a single dir (pmx), either generate one for the first
+            # step or use the already generated dir
+            work_dir = self._get_workflow_workdir()
+        else:
+            work_dir = None
+        return (
+            StepData(compounds=compounds, generic=generic),
+            work_dir,
+        )
+
+    def _get_workflow_workdir(self):
+        # check whether the workflow already has one attached, otherwise create one
+        if self.workflow.workflow_data.work_dir is not None and os.path.isdir(
+            self.workflow.workflow_data.work_dir
+        ):
+            return self.workflow.workflow_data.work_dir
+        else:
+            tmp_dir = mkdtemp()
+            self.workflow.workflow_data.work_dir = tmp_dir
+            self.logger.log(f"Set workflow's tmpdir to {tmp_dir}", _LE.DEBUG)
+            return tmp_dir
+
+    def _generate_compound_input(self, step_input: StepInputParameters) -> List:
+        compounds = []
+        for inp in step_input.compounds:
+            if inp.target_field == _IE.TARGET_FIELD_COMPOUNDS:
+                buffer = []
+                if inp.source_type == _SBE.INPUT_SOURCE_TYPE_FILE:
+                    buffer.append(self._read_compound_input_from_file(inp))
+                elif inp.source_type == _SBE.INPUT_SOURCE_TYPE_STEP:
+                    buffer.append(self._read_compound_input_from_step(inp))
+                elif inp.source_type == _SBE.INPUT_SOURCE_TYPE_STRING:
+                    buffer.append(self._read_input_from_string(inp))
+                else:
+                    raise ValueError(
+                        f"Source type {inp.source_type} for compound input unsupported - abort."
+                    )
+                if inp.target_field == _IE.SOURCE_FIELD_COMPOUNDS:
+                    # note: no unrolling here!
+                    compounds = compounds + buffer
+
+            elif inp.target_field == _IE.TARGET_FIELD_CONFORMERS:
+                if inp.source_type == _SBE.INPUT_SOURCE_TYPE_FILE:
+                    compounds = compounds + self._read_conformers_input_from_file(inp)
+        if len(compounds) > 0:
+            compounds = self._apply_compound_merger(step_input, compounds)
+        return compounds
+
+    def _generate_generic_input(
+        self, step_input: StepInputParameters, step_type
+    ) -> GenericContainer:
+        generic = GenericContainer()
+        for inp in step_input.generic:
+            files = self._read_data_to_generic(inp)
+            generic.add_files(files)
+        return generic
+
+    def _read_data_to_generic(self, inp: StepInputSource):
+        ext = inp.extension
+        if inp.source_type == _SBE.INPUT_SOURCE_TYPE_FILE or os.path.isfile(inp.source):
+            assert os.path.isfile(inp.source)
+            try:
+                with open(inp.source, "r") as f:
+                    data = f.read()
+            except UnicodeDecodeError:
+                with open(inp.source, "rb") as f:
+                    data = f.read()
+            file = GenericData(inp.source.split("/")[-1], data)
+            return [file]
+        elif inp.source_type == _SBE.INPUT_SOURCE_TYPE_URL or inp.source.startswith(
+            "http"
+        ):
+            data = self._get_pdb_file_from_api(inp.source)
+            file_name = inp.source.split("/")[-1].split(".")[0] + "." + inp.extension
+            file = GenericData(file_name=file_name, file_data=data)
+            return [file]
+        elif inp.source_type == _SBE.INPUT_SOURCE_TYPE_DIR or os.path.isdir(inp.source):
+            assert os.path.isdir(inp.source)
+            file = GenericData(
+                file_data=inp.source,
+                file_name=inp.source.split("/")[-1],
+                extension=inp.extension,
+            )
+            return [file]
+        else:
+            # fall back on step source type
+            input_step = self.workflow.find_step_by_step_id(inp.source)
+            files = input_step.data.generic.get_files_by_extension(ext)
+
+            # special case for itp and ndx files, these are included in the topol file so are never arguments
+            if ext in ["itp", "ndx"]:
+                return files
+
+            if len(files) == 1:
+                file = files[0]
+                # file.set_argument()
+                return [file]
+            # else use the argument method
+            else:
+                # this introduces a manual check on which file the user wants if there are multiple
+                file = input_step.data.generic.get_argument_by_extension(
+                    ext, rtn_file_object=True
+                )
+                return [file]
+
+    def _get_pdb_file_from_api(self, pdb_url: str):
+        response = self._get_request(pdb_url)
+        if response is None or not response.ok:
+            return None
+        return response.text
+
+    def _get_request(self, url, max_tries=5):
+        trials = 0
+        while trials < max_tries:
+            response = requests.get(url)
+            if response.status_code == 200:
+                return response
+
+    def _apply_compound_merger(
+        self, step_input: StepInputParameters, compounds: List[Compound]
+    ) -> List[Compound]:
+        merger = InputMerger(step_input.merge)
+        compounds = merger.unroll_compounds(compounds)
+        if not any(
+            [
+                True
+                for compound in step_input.compounds
+                if compound.enforce_ids is not None
+            ]
+        ):
+            compounds = merger.merge(compounds=compounds)
+
+        if len(compounds) == 0 and self.logger is not None:
+            self.logger.log(
+                "Input list of compounds is empty, this is likely an error.",
+                _LE.WARNING,
+            )
+        return compounds
+
+    def _read_compound_input_from_step(self, inp: StepInputSource):
+        input_step = self.workflow.find_step_by_step_id(inp.source)
+        return input_step.clone_compounds()
+
+    def _read_conformers_input_from_file(self, inp: StepInputSource):
+        # set up path to input file and extract the input format
+        input_format = inp.format
+        if input_format is None and self.logger is not None:
+            self.logger.log(
+                "No input format specified, will try to infer type (not recommended).",
+                _LE.WARNING,
+            )
+            input_format = infer_input_type(inp.source)
+        input_format = input_format.upper()
+
+        # call the respective loading function
+        if input_format == _SBE.FORMAT_SDF:
+            compound = Compound(compound_number=0)
+            enumeration = Enumeration()
+            for mol_id, mol in enumerate(
+                Chem.SDMolSupplier(inp.source, removeHs=False)
+            ):
+                conformer = Conformer(conformer=mol, enumeration_object=enumeration)
+                enumeration.add_conformer(conformer=conformer, auto_update=True)
+            compound.add_enumeration(enumeration, auto_update=True)
+            return [compound]
+        else:
+            raise ValueError(
+                f"At the moment, input format {input_format} is not supported."
+            )
+
+    def _read_compound_input_from_file(self, inp: StepInputSource):
+        # set up path to input file and extract the input format
+        input_format = inp.format
+        if input_format is None and self.logger is not None:
+            self.logger.log(
+                "No input format specified, will try to infer type (not recommended).",
+                _LE.WARNING,
+            )
+            input_format = infer_input_type(inp.source)
+        input_format = input_format.upper()
+
+        # call the respective loading function
+        if input_format == _SBE.FORMAT_SDF:
+            result = self._read_in_SDF_file(inp)
+        elif input_format == _SBE.FORMAT_CSV:
+            result = self._read_in_CSV_file(inp)
+        elif input_format == _SBE.FORMAT_SMI:
+            result = self._read_in_SMI_file(inp)
+        elif input_format == _SBE.FORMAT_JSON:
+            result = self._read_in_JSON_file(inp)
+        else:
+            raise ValueError(
+                f"At the moment, input format {input_format} is not supported."
+            )
+
+        # apply ID enforcement, if specified
+        return self._enforce_ids(result, inp)
+
+    def _read_input_from_string(self, inp: StepInputSource) -> List[Compound]:
+        # the strings must be separated by a semi-colon (';'); they may have names in front separated by a colon (':')
+        elements = inp.source.split(";")
+        list_compounds = []
+        for line_id, line in enumerate(elements):
+            # it could be, that names are part of the elements; otherwise use the number
+            # remove trailing or preceding white spaces
+            parts = [x.strip() for x in line.split(":")]
+            if len(parts) == 2:
+                compound = Compound(name=parts[0], compound_number=line_id)
+                smile = parts[1]
+            else:
+                compound = Compound(name=str(line_id), compound_number=line_id)
+                smile = parts[0]
+            enumeration = Enumeration(smile=smile, original_smile=smile)
+            compound.add_enumeration(enumeration, auto_update=True)
+            list_compounds.append(compound)
+
+        # apply ID enforcement, if specified
+        return self._enforce_ids(list_compounds, inp)
+
+    def _read_in_SDF_file(self, inp: StepInputSource) -> List[Compound]:
+        def _get_existing_enumeration(comp_id, enum_id):
+            comp = _get_existing_compound(comp_id)
+            for enum in comp.get_enumerations():
+                if enum.get_enumeration_id() == int(enum_id):
+                    return enum
+            raise ValueError
+
+        def _get_existing_compound(idx):
+            for comp in list_compounds:
+                if int(idx) == comp.get_compound_number():
+                    return comp
+            raise ValueError
+
+        list_compounds = []
+        compound_number = 0
+        icolos_naming = True
+        # Parses compounds following the Icolos naming convention of Compound:Enumeration:Conformer to reconstruct the compound object
+        for mol in Chem.SDMolSupplier(inp.source, removeHs=False):
+            new_compound = False
+            new_enumeration = False
+            mol_name = mol.GetProp(_WE.RDKIT_NAME)
+            # assuming the mol name follows Icolos conventions
+            try:
+                id_parts = mol_name.split(":")
+                comp_id = id_parts[0]
+                enum_id = id_parts[1]
+
+            except:
+                #
+                icolos_naming = False
+                comp_id = mol_name
+                enum_id = 0
+
+            if icolos_naming:
+                # reconstruct compound objects
+                try:
+                    # try to find an existing compound with the correct name
+                    compound = _get_existing_compound(idx=comp_id)
+                except ValueError:
+                    # the compound does not yet exist, create the object
+                    new_compound = True
+                    try:
+                        # if we have standard icolos compound naming
+                        comp_num = int(comp_id)
+                    except ValueError:
+                        # some other naming scheme
+                        comp_num = compound_number
+                    compound = Compound(name=comp_id, compound_number=comp_num)
+                try:
+                    # check whether the enumeration exists
+                    enumeration = _get_existing_enumeration(comp_id, enum_id)
+                except ValueError:
+                    new_enumeration = True
+                    enumeration = Enumeration(
+                        smile=to_smiles(mol),
+                        molecule=mol,
+                        original_smile=to_smiles(mol),
+                    )
+
+                if len(id_parts) == 3:
+                    # i.e. 0:0:0, we have a conformer
+                    conf = Conformer(
+                        conformer=mol,
+                        enumeration_object=enumeration,
+                        conformer_id=int(id_parts[2]),
+                    )
+                    enumeration.add_conformer(conf, auto_update=True)
+                if new_enumeration:
+                    compound.add_enumeration(enumeration, auto_update=True)
+                if new_compound:
+                    list_compounds.append(compound)
+
+            else:
+                # if non-standard naming conventions, simply load each mol into a new compound object, with single enum/conf
+                compound = Compound(name=comp_id, compound_number=compound_number)
+                enum = Enumeration(
+                    smile=to_smiles(mol),
+                    molecule=mol,
+                    original_smile=to_smiles(mol),
+                    enumeration_id=0,
+                )
+                enum.add_conformer(
+                    Conformer(conformer=mol, enumeration_object=mol, conformer_id=0),
+                    auto_update=True,
+                )
+                compound.add_enumeration(enumeration=enum)
+                list_compounds.append(compound)
+
+            compound_number += 1
+        return list_compounds
+
+    def _read_in_SMI_file(self, inp: StepInputSource) -> List[Compound]:
+        list_compounds = []
+        with open(inp.source, "r") as f:
+            # while the SMI file definition requires a name (separated by blanks) for each line
+            # as well, assume that this might not be present
+            lines = [line.rstrip() for line in f.readlines()]
+        for line_id, line in enumerate(lines):
+            if line == "":
+                continue
+
+            parts = line.split()
+            if len(parts) == 2:
+                compound = Compound(name=parts[1], compound_number=line_id)
+            else:
+                compound = Compound(name=str(line_id), compound_number=line_id)
+            enumeration = Enumeration(smile=parts[0], original_smile=parts[0])
+            compound.add_enumeration(enumeration, auto_update=True)
+            list_compounds.append(compound)
+        return list_compounds
+
+    def _read_in_JSON_file(self, inp: StepInputSource) -> List[Compound]:
+        list_compounds = []
+
+        # load input
+        with open(inp.source, "r") as f:
+            inp_json = f.read().replace("\r", "").replace("\n", "")
+            inp_dict = json.loads(inp_json)
+
+        comp_id = 0
+        for name, smile in zip(inp_dict[_IE.JSON_NAMES], inp_dict[_IE.JSON_SMILES]):
+            compound = Compound(name=name, compound_number=comp_id)
+            enumeration = Enumeration(smile=smile, original_smile=smile)
+            compound.add_enumeration(enumeration, auto_update=True)
+            list_compounds.append(compound)
+            comp_id += 1
+
+        return list_compounds
+
+    def _read_in_CSV_file(self, inp: StepInputSource) -> List[Compound]:
+        list_compounds = []
+        delimiter = inp.delimiter
+        data = pd.read_csv(inp.source, delimiter=delimiter)
+
+        smiles_column = inp.columns.smiles
+        if smiles_column not in list(data.columns):
+            raise StepFailed(
+                f"Column name for the smiles either not set or not found in input CSV."
+            )
+
+        # deal with names (if specified)
+        names_column = inp.columns.names
+        if names_column is None:
+            names_compounds = None
+        else:
+            if names_column not in list(data.columns):
+                raise StepFailed(
+                    f"Specified column name ({names_column}) for the names either not found in input CSV."
+                )
+            else:
+                names_compounds = [
+                    str(name).strip() for name in data[names_column].tolist()
+                ]
+
+        # build the compounds
+        smiles = [str(line).strip() for line in data[smiles_column].tolist()]
+        for number in range(len(smiles)):
+            if names_compounds is not None:
+                compound = Compound(
+                    name=names_compounds[number], compound_number=number
+                )
+            else:
+                compound = Compound(name=str(number), compound_number=number)
+            enumeration = Enumeration(
+                smile=smiles[number], original_smile=smiles[number]
+            )
+            compound.add_enumeration(enumeration, auto_update=True)
+            list_compounds.append(compound)
+        return list_compounds
+
+    def _enforce_ids(
+        self, compounds: List[Compound], inp: StepInputSource
+    ) -> List[Compound]:
+        if inp.enforce_ids is not None:
+            if inp.enforce_ids.compound_ids is not None:
+                for comp_idx, comp in enumerate(compounds):
+                    comp.set_compound_number(
+                        int(inp.enforce_ids.compound_ids[comp_idx])
+                    )
+
+            # set enumeration ids
+            enum_id_idx = 0
+            if inp.enforce_ids.enumeration_ids is not None:
+                for comp in compounds:
+                    for enum in comp.get_enumerations():
+                        enum.set_enumeration_id(
+                            int(inp.enforce_ids.enumeration_ids[enum_id_idx])
+                        )
+                        enum_id_idx += 1
+            if self.logger is not None:
+                self.logger.log(
+                    "Enforced IDs for compounds and enumerations specified (merging disabled).",
+                    _LE.DEBUG,
+                )
+        return compounds
diff --git a/icolos/core/step_utils/rdkit_utils.py b/icolos/core/step_utils/rdkit_utils.py
new file mode 100644
index 0000000..1a93267
--- /dev/null
+++ b/icolos/core/step_utils/rdkit_utils.py
@@ -0,0 +1,10 @@
+from rdkit import Chem
+
+
+def to_smiles(mol, isomericSmiles=False):
+    """
+    Converts a Mol object into a canonical SMILES string.
+    :param mol: Mol object.
+    :return: A SMILES string.
+    """
+    return Chem.MolToSmiles(mol, isomericSmiles=isomericSmiles)
diff --git a/icolos/core/step_utils/retry.py b/icolos/core/step_utils/retry.py
new file mode 100644
index 0000000..e4db30a
--- /dev/null
+++ b/icolos/core/step_utils/retry.py
@@ -0,0 +1,42 @@
+import functools
+import time
+from typing import Any
+from pydantic import BaseModel
+
+
+class RetryResult(BaseModel):
+    success: bool
+    tries: int
+    result: Any = None
+    exception: Exception = None
+
+    class Config:
+        arbitrary_types_allowed = True
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+
+# TODO: do a unit test for this
+def retry(n_tries, retry_wait_seconds, allowed_exceptions=()):
+    if n_tries < 1:
+        n_tries = 1
+
+    def decorator(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs) -> RetryResult:
+            exc = None
+            for idx in range(n_tries):
+                try:
+                    result = func(*args, **kwargs)
+                    return RetryResult(
+                        success=True, tries=idx + 1, result=result, exception=None
+                    )
+                except allowed_exceptions as e:
+                    exc = e
+                time.sleep(retry_wait_seconds)
+            return RetryResult(success=False, tries=n_tries, result=None, exception=exc)
+
+        return wrapper
+
+    return decorator
diff --git a/icolos/core/step_utils/run_variables_resolver.py b/icolos/core/step_utils/run_variables_resolver.py
new file mode 100644
index 0000000..3305f6a
--- /dev/null
+++ b/icolos/core/step_utils/run_variables_resolver.py
@@ -0,0 +1,67 @@
+from copy import deepcopy
+
+from icolos.core.containers.compound import Enumeration, Compound, Conformer
+from icolos.utils.enums.write_out_enums import RunVariablesEnum
+
+_RVE = RunVariablesEnum()
+
+
+class RunVariablesResolver:
+    def __init__(self):
+        pass
+
+    def _replace(self, input_str: str, pattern: str, replacement) -> str:
+        if replacement is not None:
+            pattern = _RVE.PREFIX + pattern + _RVE.POSTFIX
+            input_str = input_str.replace(pattern, str(replacement))
+        return input_str
+
+    def resolve_compound_level(self, input_str: str, comp: Compound) -> str:
+        comp = deepcopy(comp)
+        resolved_str = self._replace(
+            input_str, _RVE.COMPOUND_ID, comp.get_compound_number()
+        )
+        resolved_str = self._replace(resolved_str, _RVE.COMPOUND_NAME, comp.get_name())
+        return resolved_str
+
+    def resolve_enumeration_level(self, input_str: str, enum: Enumeration) -> str:
+        enum = deepcopy(enum)
+        resolved_str = self._replace(
+            input_str, _RVE.ENUMERATION_ID, enum.get_enumeration_id()
+        )
+        resolved_str = self._replace(
+            resolved_str, _RVE.ENUMERATION_STRING, enum.get_index_string()
+        )
+        return resolved_str
+
+    def resolve_conformer_level(self, input_str: str, conf: Conformer) -> str:
+        conf = deepcopy(conf)
+        resolved_str = self._replace(
+            input_str, _RVE.CONFORMER_ID, conf.get_conformer_id()
+        )
+        resolved_str = self._replace(
+            resolved_str, _RVE.CONFORMER_STRING, conf.get_index_string()
+        )
+        return resolved_str
+
+    def resolve(self, input_str: str, input_object) -> str:
+        if not isinstance(input_str, str):
+            return input_str
+
+        if isinstance(input_object, Conformer):
+            input_str = self.resolve_compound_level(
+                input_str, input_object.get_enumeration_object().get_compound_object()
+            )
+            input_str = self.resolve_enumeration_level(
+                input_str, input_object.get_enumeration_object()
+            )
+            return self.resolve_conformer_level(input_str, input_object)
+        elif isinstance(input_object, Enumeration):
+            input_str = self.resolve_compound_level(
+                input_str, input_object.get_compound_object()
+            )
+            return self.resolve_enumeration_level(input_str, input_object)
+        elif isinstance(input_object, Compound):
+            return self.resolve_compound_level(input_str, input_object)
+        else:
+            raise ValueError(f'Object of type "{type(input_object)}" is not supported.')
diff --git a/icolos/core/step_utils/sdconvert_util.py b/icolos/core/step_utils/sdconvert_util.py
new file mode 100644
index 0000000..fde466d
--- /dev/null
+++ b/icolos/core/step_utils/sdconvert_util.py
@@ -0,0 +1,68 @@
+from icolos.loggers.steplogger import StepLogger
+from icolos.utils.execute_external.sdconvert import SDConvertExecutor
+
+from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum
+from icolos.utils.enums.logging_enums import LoggingConfigEnum
+from icolos.utils.general.icolos_exceptions import StepFailed
+
+_SEE = SchrodingerExecutablesEnum()
+_LE = LoggingConfigEnum()
+
+
+class SDConvertUtil:
+    def __init__(self, prefix_execution: str = None, binary_location: str = None):
+        self._logger = StepLogger()
+
+        # initialize and check executor
+        self.executor = SDConvertExecutor(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+        if not self.executor.is_available():
+            raise StepFailed("Cannot initialize sdconvert backend - abort.")
+        self._logger.log(f"Checked sdconvert availability - valid.", _LE.DEBUG)
+
+    def execute(self, arguments: list):
+        execution_result = self.executor.execute(
+            command=_SEE.SDCONVERT, arguments=arguments, check=True
+        )
+        if execution_result.returncode != 0:
+            self._logger.log(
+                f"Could not execute sdconvert (returncode != 0) with error: {execution_result.stderr}.",
+                _LE.ERROR,
+            )
+
+    def mae2sdf(self, mae_file: str, sdf_file: str):
+        arguments = [
+            "".join([_SEE.SDCONVERT_I, _SEE.SDCONVERT_FORMAT_MAE]),
+            mae_file,
+            "".join([_SEE.SDCONVERT_O, _SEE.SDCONVERT_FORMAT_SD]),
+            sdf_file,
+        ]
+        self.execute(arguments=arguments)
+
+    def sdf2mae(self, sdf_file: str, mae_file: str):
+        arguments = [
+            "".join([_SEE.SDCONVERT_I, _SEE.SDCONVERT_FORMAT_SD]),
+            sdf_file,
+            "".join([_SEE.SDCONVERT_O, _SEE.SDCONVERT_FORMAT_MAE]),
+            mae_file,
+        ]
+        self.execute(arguments=arguments)
+
+    def pdb2mae(self, pdb_file: str, mae_file: str):
+        arguments = [
+            "".join([_SEE.SDCONVERT_I, _SEE.SDCONVERT_FORMAT_SD]),
+            pdb_file,
+            "".join([_SEE.SDCONVERT_O, _SEE.SDCONVERT_FORMAT_MAE]),
+            mae_file,
+        ]
+        self.execute(arguments=arguments)
+
+    def sdf2pdb(self, sdf_file: str, pdb_file: str):
+        arguments = [
+            "".join([_SEE.SDCONVERT_I, _SEE.SDCONVERT_FORMAT_SD]),
+            pdb_file,
+            "".join([_SEE.SDCONVERT_O, _SEE.SDCONVERT_FORMAT_PDB]),
+            sdf_file,
+        ]
+        self.execute(arguments=arguments)
diff --git a/icolos/core/step_utils/step_writeout.py b/icolos/core/step_utils/step_writeout.py
new file mode 100644
index 0000000..5bd4eac
--- /dev/null
+++ b/icolos/core/step_utils/step_writeout.py
@@ -0,0 +1,507 @@
+import os
+from collections import OrderedDict
+from copy import deepcopy
+
+import numpy as np
+import pandas as pd
+import json
+from typing import List
+from pydantic import BaseModel, PrivateAttr
+from rdkit import Chem
+from pathlib import Path
+
+from icolos.core.containers.compound import Compound, Conformer
+from icolos.core.step_utils.input_preparator import StepData
+from icolos.core.step_utils.run_variables_resolver import RunVariablesResolver
+from icolos.loggers.steplogger import StepLogger
+from icolos.utils.enums.logging_enums import LoggingConfigEnum
+from icolos.utils.enums.step_enums import StepBaseEnum
+from icolos.utils.enums.write_out_enums import WriteOutEnum
+
+_WE = WriteOutEnum()
+_LE = LoggingConfigEnum()
+_SBE = StepBaseEnum
+
+
+class StepWriteoutCompoundAggregationParameters(BaseModel):
+    mode: _SBE = _SBE.WRITEOUT_COMP_AGGREGATION_MODE_ALL
+    #     Union[
+    #     _SBE.WRITEOUT_COMP_AGGREGATION_MODE_ALL,
+    #     _SBE.WRITEOUT_COMP_AGGREGATION_MODE_BESTPERCOMPOUND,
+    #     _SBE.WRITEOUT_COMP_AGGREGATION_MODE_BESTPERENUMERATION,
+    # ] = _SBE.WRITEOUT_COMP_AGGREGATION_MODE_ALL
+    highest_is_best: bool = True
+    key: str = None
+
+
+class StepWriteoutCompoundParameters(BaseModel):
+    category: _SBE
+    #     Union[
+    #     _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS,
+    #     _SBE.WRITEOUT_COMP_CATEGORY_ENUMERATIONS,
+    #     _SBE.WRITEOUT_COMP_CATEGORY_EXTRADATA,
+    # ]
+    aggregation: StepWriteoutCompoundAggregationParameters = (
+        StepWriteoutCompoundAggregationParameters()
+    )
+    key: str = None
+    selected_tags: List[str] = None
+
+
+class StepWriteoutGenericParameters(BaseModel):
+    key: str
+
+
+class StepWriteoutDestinationParameters(BaseModel):
+    resource: str = None
+    type: _SBE = _SBE.WRITEOUT_DESTINATION_TYPE_FILE
+    #     Union[
+    #     _SBE.WRITEOUT_DESTINATION_TYPE_FILE,
+    #     _SBE.WRITEOUT_DESTINATION_TYPE_REINVENT,
+    #     _SBE.WRITEOUT_DESTINATION_TYPE_STDERR,
+    #     _SBE.WRITEOUT_DESTINATION_TYPE_STDOUT,
+    #     _SBE.WRITEOUT_DESTINATION_TYPE_REST,
+    # ]
+    # Union[_SBE.FORMAT_SDF, _SBE.FORMAT_CSV, _SBE.FORMAT_TXT]
+    format: _SBE = _SBE.FORMAT_TXT
+    merge: bool = True
+    mode: _SBE = _SBE.WRITEOUT_DESTINATION_BASE_NAME
+    #     Union[
+    #     _SBE.WRITEOUT_DESTINATION_AUTOMATIC,
+    #     _SBE.WRITEOUT_DESTINATION_BASE_NAME,
+    #     _SBE.WRITEOUT_DESTINATION_DIR,
+    # ] = _SBE.WRITEOUT_DESTINATION_BASE_NAME
+
+
+class StepWriteoutParameters(BaseModel):
+    compounds: StepWriteoutCompoundParameters = None
+    generic: StepWriteoutGenericParameters = None
+    destination: StepWriteoutDestinationParameters = None
+
+
+class WriteOutHandler(BaseModel):
+
+    config: StepWriteoutParameters
+    data: StepData = None
+
+    class Config:
+        underscore_attrs_are_private = True
+
+    _logger = PrivateAttr()
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        self._logger = StepLogger()
+
+    def set_data(self, data: StepData):
+        self.data = deepcopy(data)
+
+    def get_data(self) -> StepData:
+        return self.data
+
+    def _handle_destination_type(self):
+        if self.config.destination.type.lower() in (
+            _SBE.WRITEOUT_DESTINATION_TYPE_FILE,
+            _SBE.WRITEOUT_DESTINATION_TYPE_REINVENT,
+        ):
+            return self.config.destination.resource
+        elif (
+            self.config.destination.type.lower() == _SBE.WRITEOUT_DESTINATION_TYPE_REST
+        ):
+            raise ValueError("REST end-point destination type not supported yet.")
+        raise ValueError(
+            f"Destination type {self.config.destination.type} not supported."
+        )
+
+    def _write_compounds(self):
+        resource = self._handle_destination_type()
+        resolver = RunVariablesResolver()
+        if self.config.compounds.category == _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS:
+            if self.config.destination.format.upper() == _SBE.FORMAT_CSV:
+                if self.config.destination.format.upper() != _SBE.FORMAT_CSV:
+                    raise NotImplementedError(
+                        "Only supporting CSV write-out format for tabular data."
+                    )
+                self._writeout_tabular()
+            elif self.config.destination.format.upper() == _SBE.FORMAT_JSON:
+                self._writeout_reinvent()
+            elif self.config.destination.format.upper() == _SBE.FORMAT_SDF:
+
+                def _write_compounds(compounds: List[Compound], resource: str):
+                    # TODO: deal with resolving resources differently (also for writing enumerations below)
+                    resource_resolved = resource
+                    for compound in compounds:
+                        for enum in compound.get_enumerations():
+                            if len(enum.get_conformers()) > 0:
+                                resource_resolved = resolver.resolve(resource, enum[0])
+                                break
+                    self._make_folder(resource_resolved)
+
+                    writer = Chem.SDWriter(resource_resolved)
+                    written = 0
+
+                    for comp in compounds:
+                        for enum in comp:
+                            for conf in enum:
+                                molecule = conf.get_molecule()
+                                if (
+                                    comp.get_name() is not None
+                                    and comp.get_name() != ""
+                                ):
+                                    molecule.SetProp(_WE.COMPOUND_NAME, comp.get_name())
+                                molecule.SetProp(
+                                    _WE.RDKIT_NAME, conf.get_index_string()
+                                )
+                                molecule.SetProp(
+                                    _WE.INDEX_STRING, conf.get_index_string()
+                                )
+                                writer.write(molecule)
+                                written += 1
+                    writer.close()
+                    self._logger.log(
+                        f"Wrote {written} conformers to file {resource_resolved}.",
+                        _LE.DEBUG,
+                    )
+
+                # TODO: At the moment, this only splits at the compound level (taking the first conformer for resolving),
+                #       add full generic support.
+                if self.config.destination.merge:
+                    _write_compounds(self.data.compounds, resource=resource)
+                else:
+                    for comp in self.data.compounds:
+                        _write_compounds([comp], resource)
+        elif self.config.compounds.category == _SBE.WRITEOUT_COMP_CATEGORY_ENUMERATIONS:
+            if not self.config.destination.format.upper() == _SBE.FORMAT_SDF:
+                raise NotImplementedError(
+                    "This write-out is not supported for enumerations."
+                )
+            else:
+
+                def _write_compounds(compounds: List[Compound], resource: str):
+                    # TODO: deal with resolving resources differently (also for writing conformers above)
+                    resource_resolved = resource
+                    for compound in compounds:
+                        if len(compound.get_enumerations()) > 0:
+                            resource_resolved = resolver.resolve(resource, compounds[0])
+                            break
+
+                    self._make_folder(resource_resolved)
+                    writer = Chem.SDWriter(resource_resolved)
+                    written = 0
+                    for comp in compounds:
+                        for enum in comp:
+                            molecule = enum.get_molecule()
+                            if comp.get_name() is not None and comp.get_name() != "":
+                                molecule.SetProp(_WE.COMPOUND_NAME, comp.get_name())
+                            molecule.SetProp(_WE.RDKIT_NAME, enum.get_index_string())
+                            molecule.SetProp(_WE.INDEX_STRING, enum.get_index_string())
+                            writer.write(molecule)
+                            written += 1
+                    writer.close()
+                    self._logger.log(
+                        f"Wrote {written} enumeration molecules to file {resource_resolved}.",
+                        _LE.DEBUG,
+                    )
+
+                if self.config.destination.merge:
+                    _write_compounds(self.data.compounds, resource=resource)
+                else:
+                    for comp in self.data.compounds:
+                        _write_compounds([comp], resource)
+        elif self.config.compounds.category == _SBE.WRITEOUT_COMP_CATEGORY_EXTRADATA:
+            if self.config.destination.format.upper() != _SBE.FORMAT_TXT:
+                raise ValueError(
+                    f"For writing out extra-data (attached to conformers), only TXT is supported as format."
+                )
+            # TODO: Does merging here makes any sense?
+            for comp in self.data.compounds:
+                for enum in comp:
+                    for conf in enum:
+                        resource_resolved = resolver.resolve(resource, conf)
+                        self._make_folder(resource_resolved)
+                        with open(resource_resolved, "w") as f:
+                            content = conf.get_extra_data()[self.config.compounds.key]
+                            if isinstance(content, list):
+                                for line in content:
+                                    f.write(line.rstrip("\n") + "\n")
+                            elif isinstance(content, str):
+                                f.write(content)
+                            else:
+                                raise ValueError(
+                                    "Extra data must be either a string or a list of strings."
+                                )
+        else:
+            raise ValueError(f"{self.config.compounds.category} not supported.")
+
+    def _write_generic_data(self):
+        if (
+            self.config.destination.type.lower() != _SBE.WRITEOUT_DESTINATION_TYPE_FILE
+            or self.config.destination.format.upper() != _SBE.FORMAT_TXT
+        ):
+            raise ValueError(
+                'When writing out generic data, you must use type "file" and format "txt".'
+            )
+        # resource should be a directory for writeout only, in most cases it should already exist
+        resource = self._handle_destination_type()
+        self._make_folder(resource)
+        if self.config.destination.mode == _SBE.WRITEOUT_DESTINATION_DIR:
+            # The output path should be a directory only
+            if not os.path.isdir(resource):
+                raise AssertionError(
+                    "When specifying a directory, the writeout destination resource must not be a filepath!"
+                )
+        # write out all files from that step with the required extension
+        for idx, file in enumerate(
+            self.data.generic.get_files_by_extension(self.config.generic.key)
+        ):
+            if self.config.destination.mode == _SBE.WRITEOUT_DESTINATION_BASE_NAME:
+                parts = resource.split(".")
+                resource = parts[0] + f"_{idx}." + parts[1]
+                file.write(resource, join=False)
+            elif self.config.destination.mode == _SBE.WRITEOUT_DESTINATION_AUTOMATIC:
+                parts = file.get_file_name().split(".")
+                file_name = parts[0] + f"_{idx}." + parts[1]
+                resource = os.path.join("/".join(resource.split("/")[:-1]), file_name)
+                file.write(resource, join=False)
+            elif self.config.destination.mode == _SBE.WRITEOUT_DESTINATION_DIR:
+                resource = resource
+                assert os.path.isdir(resource)
+                file.write(resource, join=True, final_writeout=True)
+
+    def write(self):
+        if self.config.compounds is not None and self.config.generic is not None:
+            raise ValueError("Only specify either compounds or generic data, not both.")
+
+        if self.config.compounds is not None:
+            self._write_compounds()
+        elif self.config.generic is not None:
+            self._write_generic_data()
+        else:
+            raise ValueError("Either compounds or generic data has to be specified.")
+
+    def _writeout_reinvent(self):
+        def _get_conf_by_comp_name(confs: List[Conformer], comp_name: str) -> Conformer:
+            # assumes there is at most 1 conformer / compound left at this stage, as is required by REINVENT
+            for conf in confs:
+                if conf.get_compound_name() == comp_name:
+                    return conf
+            return None
+
+        dict_result = {_WE.JSON_RESULTS: []}
+        tags = self._get_selected_tags()
+
+        # add names, including those for which no conformer has been obtained
+        dict_result[_WE.JSON_NAMES] = [comp.get_name() for comp in self.data.compounds]
+
+        # do aggregation (might remove conformers)
+        confs_unrolled = self._apply_aggregation(self.data.compounds)
+
+        # add values (derived from molecule tags)
+        # TODO: if no conformers are left, we need to write out an empty JSON that tells REINVENT that none worked
+        for tag in tags:
+            values = []
+            for comp_name in dict_result[_WE.JSON_NAMES]:
+                conf = _get_conf_by_comp_name(confs=confs_unrolled, comp_name=comp_name)
+                if conf is not None:
+                    try:
+                        value = conf.get_molecule().GetProp(tag)
+                    except KeyError:
+                        value = _WE.JSON_NA
+                else:
+                    value = _WE.JSON_NA
+                values.append(value.strip())
+            dict_result[_WE.JSON_RESULTS].append(
+                {_WE.JSON_VALUES_KEY: tag, _WE.JSON_VALUES: values}
+            )
+
+        # TODO: refactor that part
+        resource = self._handle_destination_type()
+        if len(confs_unrolled) > 0:
+            resolver = RunVariablesResolver()
+            resource_resolved = resolver.resolve(resource, confs_unrolled[0])
+        else:
+            resource_resolved = resource
+            self._logger.log(
+                f"No conformers obtained, write-out resource resolving disabled.",
+                _LE.WARNING,
+            )
+        self._make_folder(resource_resolved)
+
+        # write-out according to destination type
+        # TODO: there seems to be an issue here, when multiple write-out blocks are specified and no conformers are
+        #       left: only the first block gets executed and if that's not the REINVENT one, the run will crash
+        if self.config.destination.type.lower() in (
+            _SBE.WRITEOUT_DESTINATION_TYPE_REINVENT,
+            _SBE.WRITEOUT_DESTINATION_TYPE_FILE,
+        ):
+            with open(resource_resolved, "w") as f:
+                json.dump(dict_result, f, indent=4)
+        elif self.config.destination.type.lower() in (
+            _SBE.WRITEOUT_DESTINATION_TYPE_STDOUT,
+            _SBE.WRITEOUT_DESTINATION_TYPE_STDERR,
+        ):
+            json.dump(dict_result, resource_resolved, indent=4)
+        else:
+            raise ValueError(
+                f"Destination type {self.config.destination.type} not supported for this function."
+            )
+
+    def _get_selected_tags(self) -> List[str]:
+        # this function returns a list of tags (strings) that are to be considered for e.g. tabular write-out
+        # if the respective configuration field is set to "None", use all tags (over all compounds in a batch)
+        if self.config.compounds.selected_tags is not None:
+            if isinstance(self.config.compounds.selected_tags, list):
+                list_tags = self.config.compounds.selected_tags
+            elif isinstance(self.config.compounds.selected_tags, str):
+                list_tags = [self.config.compounds.selected_tags]
+            else:
+                raise ValueError(
+                    f'Tag selection "{self.config.compounds.selected_tags}" set to illegal value.'
+                )
+        else:
+            # get all tags for all compounds
+            list_tags = []
+            for comp in self.data.compounds:
+                for enum in comp:
+                    for conf in enum:
+                        list_tags = list_tags + list(conf.get_molecule().GetPropNames())
+
+        list_tags = list(set(list_tags))
+        return list_tags
+
+    def _initialize_dict_csv(
+        self, keys: List[str], nrow: int, fill_value=np.NaN
+    ) -> OrderedDict:
+        return_dict = OrderedDict()
+        for key in keys:
+            return_dict[key] = [fill_value for _ in range(nrow)]
+        return return_dict
+
+    def _apply_aggregation(self, compounds: List[Compound]) -> List[Conformer]:
+        if (
+            self.config.compounds.aggregation.mode
+            == _SBE.WRITEOUT_COMP_AGGREGATION_MODE_ALL
+        ):
+            return self._unroll_conformers(compounds)
+
+        confs_remaining = []
+        if (
+            self.config.compounds.aggregation.mode
+            == _SBE.WRITEOUT_COMP_AGGREGATION_MODE_BESTPERENUMERATION
+        ):
+            raise NotImplementedError("Best per enumeration is not yet implemented.")
+        elif (
+            self.config.compounds.aggregation.mode
+            == _SBE.WRITEOUT_COMP_AGGREGATION_MODE_BESTPERCOMPOUND
+        ):
+            for comp in compounds:
+                unrolled_conformers = self._unroll_conformers([comp])
+                if len(unrolled_conformers) == 0:
+                    continue
+                values = [
+                    float(
+                        conf.get_molecule().GetProp(
+                            self.config.compounds.aggregation.key
+                        )
+                    )
+                    for conf in unrolled_conformers
+                ]
+                index_best = (
+                    values.index(max(values))
+                    if self.config.compounds.aggregation.highest_is_best
+                    else values.index(min(values))
+                )
+                confs_remaining.append(unrolled_conformers[index_best])
+        return confs_remaining
+
+    def _unroll_conformers(self, compounds: List[Compound]) -> List[Conformer]:
+        result = []
+        for comp in compounds:
+            for enum in comp:
+                for conf in enum:
+                    result.append(conf)
+        return result
+
+    def _writeout_tabular(self):
+        # get all tags of the molecules that are to be considered
+        tags = self._get_selected_tags()
+
+        # remove the compound_name and _Name, as they will be specifically added at the beginning
+        if _WE.COMPOUND_NAME in tags:
+            tags.remove(_WE.COMPOUND_NAME)
+        if _WE.RDKIT_NAME in tags:
+            tags.remove(_WE.RDKIT_NAME)
+
+        # do aggregation (might remove conformers)
+        confs_unrolled = self._apply_aggregation(self.data.compounds)
+
+        # initialize a dictionary with all tags as keys and filled with NA for every position
+        dict_result = self._initialize_dict_csv(
+            keys=[_WE.RDKIT_NAME, _WE.COMPOUND_NAME] + tags, nrow=len(confs_unrolled)
+        )
+
+        # resolve resource
+        # TODO: refactor that part
+        resource = self._handle_destination_type()
+        resolver = RunVariablesResolver()
+        if len(confs_unrolled) == 0:
+            raise ValueError("No conformers found.")
+        resource_resolved = resolver.resolve(resource, confs_unrolled[0])
+        self._make_folder(resource_resolved)
+
+        # populate the dictionary with the values (if present)
+        for irow in range(len(confs_unrolled)):
+            # add the internal Icolos identifier
+            conf = confs_unrolled[irow]
+            dict_result[_WE.RDKIT_NAME][irow] = conf.get_index_string()
+
+            # add the compound name, if specified
+            name = conf.get_compound_name()
+            dict_result[_WE.COMPOUND_NAME][irow] = "" if name is None else name
+            for tag in tags:
+                try:
+                    value = conf.get_molecule().GetProp(tag).strip()
+                except KeyError:
+                    value = np.nan
+                dict_result[tag][irow] = value
+
+        # do the writeout (after sanitation)
+        df_result = pd.DataFrame.from_dict(dict_result)
+        df_result = self._sanitize_df_columns(df=df_result)
+        df_result.to_csv(
+            path_or_buf=resource_resolved,
+            sep=",",
+            na_rep="",
+            header=True,
+            index=False,
+            mode="w",
+            quoting=None,
+        )
+        self._logger.log(
+            f"Wrote data frame with {len(confs_unrolled)} rows and {len(tags)} columns to file {resource_resolved}.",
+            _LE.DEBUG,
+        )
+
+    def _sanitize_df_columns(self, df: pd.DataFrame) -> pd.DataFrame:
+        cols_before = df.columns.to_list()
+        df.columns = (
+            df.columns.str.strip()
+            .str.replace(" ", "_")
+            .str.replace("(", "")
+            .str.replace(")", "")
+            .str.replace("/", "_")
+            .str.replace("[", "")
+            .str.replace("]", "")
+        )
+        for col_before, col_after in zip(cols_before, df.columns.to_list()):
+            if col_before != col_after:
+                self._logger.log(
+                    f"Sanitized column name {col_before} to {col_after}.", _LE.WARNING
+                )
+        return df
+
+    def _make_folder(self, path):
+        if isinstance(path, str):
+            if not os.path.isdir(path):
+                path = os.path.dirname(path)
+            Path(path).mkdir(parents=True, exist_ok=True)
diff --git a/icolos/core/step_utils/structcat_util.py b/icolos/core/step_utils/structcat_util.py
new file mode 100644
index 0000000..5668756
--- /dev/null
+++ b/icolos/core/step_utils/structcat_util.py
@@ -0,0 +1,68 @@
+from typing import List
+
+from icolos.loggers.steplogger import StepLogger
+from icolos.utils.execute_external.openbabel import OpenBabelExecutor
+from icolos.utils.execute_external.structcat import StructcatExecutor
+
+from icolos.utils.enums.program_parameters import (
+    OpenBabelEnum,
+    SchrodingerExecutablesEnum,
+)
+from icolos.utils.enums.logging_enums import LoggingConfigEnum
+from icolos.utils.general.icolos_exceptions import StepFailed
+
+_SEE = SchrodingerExecutablesEnum()
+_LE = LoggingConfigEnum()
+
+_OE = OpenBabelEnum()
+
+
+class StructcatUtil:
+    def __init__(
+        self,
+        prefix_execution: str = None,
+        binary_location: str = None,
+        backend: str = _SEE.STRUCTCAT,
+    ):
+        self._logger = StepLogger()
+        self._backend = backend
+        # initialize and check executor
+        if self._backend == _SEE.STRUCTCAT:
+            self.executor = StructcatExecutor(
+                prefix_execution=prefix_execution, binary_location=binary_location
+            )
+        elif self._backend == _OE.OBABEL:
+            self.executor = OpenBabelExecutor()
+
+        if not self.executor.is_available():
+            raise StepFailed("Cannot initialize structcat backend - abort.")
+
+    def concatenate(
+        self,
+        input_files: List[str],
+        output_file: str,
+        location: str = None,
+        backend=_SEE.STRUCTCAT,
+    ):
+        if self._backend == _SEE.STRUCTCAT:
+            arguments = []
+            for input_file in input_files:
+                arguments = arguments + [
+                    _SEE.STRUCTCAT_I,
+                    input_file,
+                ]
+            arguments = arguments + [
+                _SEE.STRUCTCAT_O,
+                output_file,
+            ]
+            self.executor.execute(
+                command=_SEE.STRUCTCAT, arguments=arguments, check=True
+            )
+
+        elif self._backend == _OE.OBABEL:
+            arguments = input_files
+            arguments.append("-O")
+            arguments.append(output_file)
+            self.executor.execute(
+                command=_OE.OBABEL, arguments=arguments, check=True, location=location
+            )
diff --git a/icolos/core/step_utils/structconvert.py b/icolos/core/step_utils/structconvert.py
new file mode 100644
index 0000000..b7f0798
--- /dev/null
+++ b/icolos/core/step_utils/structconvert.py
@@ -0,0 +1,69 @@
+from icolos.loggers.steplogger import StepLogger
+from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum
+from icolos.utils.execute_external.schrodinger import SchrodingerExecutor
+from icolos.utils.general.icolos_exceptions import StepFailed
+from icolos.utils.enums.logging_enums import LoggingConfigEnum
+
+_LE = LoggingConfigEnum()
+_SEE = SchrodingerExecutablesEnum()
+
+
+class StructConvert:
+    """
+    Utility for converting structure files with Schrodinger's StructConvert
+    """
+
+    def __init__(self, prefix_execution: str, binary_location: str = None) -> None:
+        self._logger = StepLogger()
+
+        self.executor = SchrodingerExecutor(
+            binary_location=binary_location, prefix_execution=prefix_execution
+        )
+        if not self.executor.is_available():
+            raise StepFailed("Cannot initialize sdconvert backend - abort.")
+        self._logger.log(f"Checked sdconvert availability - valid.", _LE.DEBUG)
+
+    def execute(self, arguments: list):
+        execution_result = self.executor.execute(
+            command=_SEE.STRUCTCONVERT, arguments=arguments, check=True
+        )
+        if execution_result.returncode != 0:
+            self._logger.log(
+                f"Could not execute sdconvert (returncode != 0) with error: {execution_result.stderr}.",
+                _LE.ERROR,
+            )
+
+    def convert(self, input_file: str, output_file: str):
+        arguments = [
+            input_file,
+            output_file,
+        ]
+        self.execute(arguments=arguments)
+
+    def pdb2mae(self, pdb_file: str, mae_file: str):
+        # new schrodinger does not check this, needs to be done here
+        assert pdb_file.endswith(".pdb")
+        assert mae_file.endswith(".mae")
+        arguments = [
+            pdb_file,
+            mae_file,
+        ]
+        self.execute(arguments=arguments)
+
+    def sdf2pdb(self, sdf_file: str, pdb_file: str):
+        assert sdf_file.endswith(".sdf")
+        assert pdb_file.endswith(".pdb")
+        arguments = [
+            sdf_file,
+            pdb_file,
+        ]
+        self.execute(arguments=arguments)
+
+    def mae2pdb(self, mae_file: str, pdb_file: str):
+        assert mae_file.endswith(".mae")
+        assert pdb_file.endswith(".pdb")
+        arguments = [
+            mae_file,
+            pdb_file,
+        ]
+        self.execute(arguments=arguments)
diff --git a/icolos/core/steps_utils.py b/icolos/core/steps_utils.py
new file mode 100644
index 0000000..ace576b
--- /dev/null
+++ b/icolos/core/steps_utils.py
@@ -0,0 +1,22 @@
+from icolos.core.workflow_steps.step import StepBase
+from icolos.utils.enums.step_enums import StepBaseEnum
+from icolos.utils.general.convenience_functions import nested_get
+from icolos.utils.enums.step_initialization_enum import StepInitializationEnum
+from icolos.utils.enums.flow_control_enums import FlowControlInitializationEnum
+
+_IE = StepInitializationEnum()
+_FCE = FlowControlInitializationEnum()
+
+
+def initialize_step_from_dict(step_conf: dict) -> StepBase:
+    _STE = StepBaseEnum
+    step_type = nested_get(step_conf, _STE.STEP_TYPE, default=None)
+    step_type = None if step_type is None else step_type.upper()
+    if step_type in _IE.STEP_INIT_DICT.keys():
+        return _IE.STEP_INIT_DICT[step_type](**step_conf)
+    elif step_type in _FCE.FLOW_CONTROL_INIT_DICT.keys():
+        return _FCE.FLOW_CONTROL_INIT_DICT[step_type](**step_conf)
+    else:
+        raise ValueError(
+            f"Backend for step {nested_get(step_conf, _STE.STEPID, '')} unknown."
+        )
diff --git a/icolos/core/workflow_steps/__init__.py b/icolos/core/workflow_steps/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/core/workflow_steps/autodockvina/__init__.py b/icolos/core/workflow_steps/autodockvina/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/core/workflow_steps/autodockvina/docking.py b/icolos/core/workflow_steps/autodockvina/docking.py
new file mode 100644
index 0000000..0d8df83
--- /dev/null
+++ b/icolos/core/workflow_steps/autodockvina/docking.py
@@ -0,0 +1,324 @@
+import os
+import shutil
+import tempfile
+from typing import List, Tuple
+
+from pydantic import BaseModel, Field
+from rdkit import Chem
+from copy import deepcopy
+
+from icolos.utils.enums.step_enums import StepAutoDockVinaEnum, StepBaseEnum
+from icolos.utils.execute_external.autodockvina import AutoDockVinaExecutor
+from icolos.utils.execute_external.openbabel import OpenBabelExecutor
+from icolos.utils.general.icolos_exceptions import StepFailed
+from icolos.utils.general.files_paths import gen_tmp_file
+from icolos.core.containers.compound import Conformer
+from icolos.utils.enums.program_parameters import AutoDockVinaEnum, OpenBabelEnum
+from icolos.core.workflow_steps.step import _LE, StepBase
+from icolos.utils.general.parallelization import Subtask, SubtaskContainer, Parallelizer
+
+_SBE = StepBaseEnum
+_ADE = AutoDockVinaEnum()
+_OBE = OpenBabelEnum()
+_SAE = StepAutoDockVinaEnum()
+
+
+class ADVSearchSpace(BaseModel):
+    center_x: float = Field(alias="--center_x", default=None)
+    center_y: float = Field(alias="--center_y", default=None)
+    center_z: float = Field(alias="--center_z", default=None)
+    size_x: float = Field(alias="--size_x", default=15.0)
+    size_y: float = Field(alias="--size_y", default=15.0)
+    size_z: float = Field(alias="--size_z", default=15.0)
+
+
+class ADVConfiguration(BaseModel):
+    seed: int = 42
+    number_poses: int = 1
+    search_space: ADVSearchSpace = ADVSearchSpace()
+    receptor_path: str = None
+
+
+class ADVAdditional(BaseModel):
+    configuration: ADVConfiguration = ADVConfiguration()
+    grid_ids: List[str] = ["grid0"]
+
+
+class StepAutoDockVina(StepBase, BaseModel):
+
+    _openbabel_executor: OpenBabelExecutor = None
+    adv_additional: ADVAdditional = None
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # initialize the executor and test availability
+        self._initialize_backend(executor=AutoDockVinaExecutor)
+        self._check_backend_availability()
+
+        # initialize the executor for all "OpenBabel"
+        self._openbabel_executor = OpenBabelExecutor()
+        if not self._openbabel_executor.is_available():
+            raise StepFailed(
+                "AutoDock Vina requires OpenBabel execution, initialization failed."
+            )
+
+        # set ADV specific settings and ensure that each molecule gets its own sublist
+        self.adv_additional = ADVAdditional(**self.settings.additional)
+        self.execution.parallelization.max_length_sublists = 1
+
+    def _set_docking_score(self, conformer: Chem.Mol) -> bool:
+        try:
+            result_tag_lines = conformer.GetProp(_ADE.REMARK_TAG).split("\n")
+            result_line = [
+                line for line in result_tag_lines if _ADE.RESULT_LINE_IDENTIFIER in line
+            ][0]
+            parts = result_line.split()
+            docking_score = parts[_ADE.RESULT_LINE_POS_SCORE]
+        except KeyError:
+            return False
+        conformer.SetProp(_SBE.ANNOTATION_TAG_DOCKING_SCORE, str(docking_score))
+        return True
+
+    def _write_molecule_to_pdbqt(self, path: str, molecule: Chem.Mol) -> bool:
+        # generate temporary copy as PDB
+        _, tmp_pdb = gen_tmp_file(suffix=".pdb", dir=os.path.dirname(path))
+        Chem.MolToPDBFile(molecule, filename=tmp_pdb)
+
+        # translate the pdb into a pdbqt including partial charges
+        # Note: In contrast to the target preparation,
+        # we will use a tree-based flexibility treatment here -
+        # thus, the option "-xr" is NOT used.
+        arguments = [
+            tmp_pdb,
+            _OBE.OBABEL_OUTPUT_FORMAT_PDBQT,
+            "".join([_OBE.OBABEL_O, path]),
+            _OBE.OBABEL_PARTIALCHARGE,
+            _OBE.OBABEL_PARTIALCHARGE_GASTEIGER,
+        ]
+        self._openbabel_executor.execute(
+            command=_OBE.OBABEL, arguments=arguments, check=False
+        )
+
+        if os.path.exists(path):
+            return True
+        else:
+            return False
+
+    def _generate_temporary_input_output_files(
+        self, batch: List[List[Subtask]]
+    ) -> Tuple[List[str], List[str], List[str], List[str]]:
+        tmp_output_dirs = []
+        tmp_input_paths = []
+        tmp_output_paths = []
+        enumeration_ids = []
+
+        for next_subtask_list in batch:
+            # for "AutoDock Vina", only single molecules can be handled so every sublist is
+            # guaranteed at this stage to have only one element
+            if len(next_subtask_list) > 1:
+                self._logger.log(
+                    f"Subtask list length for ADV is > 1 ({len(next_subtask_list)}), only the first element will be processed.",
+                    _LE.WARNING,
+                )
+            subtask = next_subtask_list[0]
+
+            # generate temporary input files and output directory
+            cur_tmp_output_dir = tempfile.mkdtemp()
+            _, cur_tmp_input_pdbqt = gen_tmp_file(
+                suffix=".pdbqt", dir=cur_tmp_output_dir
+            )
+            _, cur_tmp_output_sdf = gen_tmp_file(suffix=".sdf", dir=cur_tmp_output_dir)
+
+            # try to write the enumeration molecules out as PDBQT files
+            enumeration = subtask.data
+            mol = deepcopy(enumeration.get_molecule())
+            if mol is None:
+                shutil.rmtree(cur_tmp_output_dir)
+                self._logger.log(
+                    f"Enumeration {enumeration.get_index_string()} did not hold a valid RDkit molecule - skipped.",
+                    _LE.DEBUG,
+                )
+                continue
+            if not self._write_molecule_to_pdbqt(cur_tmp_input_pdbqt, mol):
+                self._logger.log(
+                    f"Could not generate PDBQT intermediate file from enumeration {enumeration.get_index_string()} - skipped.",
+                    _LE.DEBUG,
+                )
+                continue
+
+            # also store all the paths in case it succeeded -> these will be used later, failures will be ignored
+            tmp_output_dirs.append(cur_tmp_output_dir)
+            tmp_input_paths.append(cur_tmp_input_pdbqt)
+            tmp_output_paths.append(cur_tmp_output_sdf)
+            enumeration_ids.append(enumeration.get_index_string())
+
+        return tmp_output_dirs, tmp_input_paths, tmp_output_paths, enumeration_ids
+
+    def _execute_autodockvina(self):
+        # get number of sublists in batch and initialize Parallelizer
+        adv_parallelizer = Parallelizer(func=self._run_subjob)
+
+        # continue until everything is successfully done or number of retries have been exceeded
+        while self._subtask_container.done() is False:
+            next_batch = self._get_sublists(get_first_n_lists=self._get_number_cores())
+
+            # generate paths and initialize molecules (so that if they fail, this can be covered)
+            (
+                tmp_output_dirs,
+                tmp_input_paths,
+                tmp_output_paths,
+                enumeration_ids,
+            ) = self._generate_temporary_input_output_files(next_batch)
+
+            # execute the current batch in parallel; hand over lists of parameters (will be handled by Parallelizer)
+            # also increment the tries and set the status to "failed" (don't do that inside subprocess, as data is
+            # copied, not shared!)
+            _ = [sub.increment_tries() for element in next_batch for sub in element]
+            _ = [sub.set_status_failed() for element in next_batch for sub in element]
+            adv_parallelizer.execute_parallel(
+                input_path_pdbqt=tmp_input_paths, output_path_sdf=tmp_output_paths
+            )
+
+            # parse the output of that particular batch and remove temporary files
+            self._parse_adv_output_batch(
+                tmp_input_paths=tmp_input_paths,
+                tmp_output_paths=tmp_output_paths,
+                enumeration_ids=enumeration_ids,
+                next_batch=next_batch,
+            )
+
+            # clean-up
+            self._remove_temporary(tmp_output_dirs)
+
+            # print the progress for this execution
+            self._log_execution_progress()
+
+    def _parse_adv_output_batch(
+        self,
+        tmp_input_paths: List[str],
+        tmp_output_paths: List[str],
+        enumeration_ids: List[str],
+        next_batch: List[List[Subtask]],
+    ):
+
+        for i in range(len(next_batch)):
+            subtask = next_batch[i][0]
+            tmp_output_path = tmp_output_paths[i]
+            tmp_input_path = tmp_input_paths[i]
+            enumeration_id = enumeration_ids[i]
+            grid_id = self.adv_additional.grid_ids[0]
+            grid_path = self.adv_additional.configuration.receptor_path
+
+            # this is a protection against the case where empty (file size == 0 bytes) files are generated due to
+            # a failure during docking
+            if (
+                not os.path.isfile(tmp_output_path)
+                or os.path.getsize(tmp_output_path) == 0
+            ):
+                continue
+
+            mol_supplier = Chem.SDMolSupplier(tmp_output_path, removeHs=False)
+            for mol in mol_supplier:
+                if mol is None:
+                    continue
+                cur_enumeration_name = str(mol.GetProp("_Name"))
+
+                # add the information on the actual grid used
+                mol.SetProp(_SBE.ANNOTATION_GRID_ID, str(grid_id))
+                mol.SetProp(_SBE.ANNOTATION_GRID_PATH, str(grid_path))
+                mol.SetProp(_SBE.ANNOTATION_GRID_FILENAME, os.path.basename(grid_path))
+
+                # if no docking score is attached (i.e. the molecule is a receptor or so, skip it)
+                if self._set_docking_score(mol) is not True:
+                    continue
+
+                # add molecule to the appropriate ligand
+                for compound in self.get_compounds():
+                    for enumeration in compound:
+                        if enumeration.get_index_string() == enumeration_id:
+                            new_conformer = Conformer(
+                                conformer=mol,
+                                conformer_id=None,
+                                enumeration_object=enumeration,
+                            )
+                            enumeration.add_conformer(new_conformer, auto_update=True)
+                            subtask.set_status_success()
+                            break
+
+    def _delay_file_system(self, path) -> bool:
+        return self._wait_until_file_generation(
+            path=path, interval_sec=2, maximum_sec=10
+        )
+
+    def _run_subjob(self, input_path_pdbqt: str, output_path_sdf: str):
+
+        config = self.adv_additional.configuration
+
+        # set up arguments list and execute
+        _, tmp_pdbqt_docked = gen_tmp_file(
+            suffix=".pdbqt", dir=os.path.dirname(input_path_pdbqt)
+        )
+        arguments = [
+            _ADE.VINA_RECEPTOR,
+            config.receptor_path,
+            _ADE.VINA_LIGAND,
+            input_path_pdbqt,
+            _ADE.VINA_CPU,
+            str(1),
+            _ADE.VINA_SEED,
+            config.seed,
+            _ADE.VINA_OUT,
+            tmp_pdbqt_docked,
+            _ADE.VINA_CENTER_X,
+            str(config.search_space.center_x),
+            _ADE.VINA_CENTER_Y,
+            str(config.search_space.center_y),
+            _ADE.VINA_CENTER_Z,
+            str(config.search_space.center_z),
+            _ADE.VINA_SIZE_X,
+            str(config.search_space.size_x),
+            _ADE.VINA_SIZE_Y,
+            str(config.search_space.size_y),
+            _ADE.VINA_SIZE_Z,
+            str(config.search_space.size_z),
+            _ADE.VINA_NUM_MODES,
+            config.number_poses,
+        ]
+
+        execution_result = self._backend_executor.execute(
+            command=_ADE.VINA, arguments=arguments, check=True
+        )
+        self._delay_file_system(path=tmp_pdbqt_docked)
+
+        # translate the parsed output PDBQT into an SDF
+        arguments = [
+            tmp_pdbqt_docked,
+            _OBE.OBABEL_INPUTFORMAT_PDBQT,
+            _OBE.OBABEL_OUTPUT_FORMAT_SDF,
+            "".join([_OBE.OBABEL_O, output_path_sdf]),
+        ]
+        self._openbabel_executor.execute(
+            command=_OBE.OBABEL, arguments=arguments, check=False
+        )
+        self._delay_file_system(path=output_path_sdf)
+
+    def execute(self):
+        # Note: This step only supports one grid at a time, ensemble docking is taken care of at the workflow level!
+
+        # in order to be able to efficiently execute ADV on the enumeration level, all of them have to be unrolled
+        # Note: As they retain their respective Compound object, the attribution later on is simple
+        all_enumerations = []
+        for compound in self.get_compounds():
+            all_enumerations = all_enumerations + compound.get_enumerations()
+            for enumeration in compound:
+                enumeration.clear_conformers()
+
+        # split into sublists, according to the settings
+        self._subtask_container = SubtaskContainer(
+            max_tries=self.execution.failure_policy.n_tries
+        )
+        self._subtask_container.load_data(all_enumerations)
+
+        # execute ADV
+        self._execute_autodockvina()
diff --git a/icolos/core/workflow_steps/autodockvina/target_preparation.py b/icolos/core/workflow_steps/autodockvina/target_preparation.py
new file mode 100644
index 0000000..84f3ab9
--- /dev/null
+++ b/icolos/core/workflow_steps/autodockvina/target_preparation.py
@@ -0,0 +1,137 @@
+from pydantic import BaseModel
+from rdkit import Chem
+
+from icolos.utils.enums.program_parameters import OpenBabelEnum
+from icolos.utils.enums.step_enums import StepAutoDockVinaTargetPreparationEnum
+from icolos.utils.execute_external.autodockvina import AutoDockVinaExecutor
+from icolos.utils.execute_external.openbabel import OpenBabelExecutor
+from icolos.utils.general.icolos_exceptions import StepFailed
+
+from icolos.core.workflow_steps.step import _LE, StepBase
+
+_STE = StepAutoDockVinaTargetPreparationEnum()
+_OBE = OpenBabelEnum()
+
+
+class ADVExtractBoxTP(BaseModel):
+    reference_ligand_path: str = None
+    reference_ligand_format: str = _STE.EXTRACT_BOX_REFERENCE_LIGAND_FORMAT_PDB
+
+
+class ADVAdditionalTP(BaseModel):
+    pH: float = (
+        7.4  # set target pH value that determines the protein's side-chain states
+    )
+    input_receptor_pdb: str = None
+    output_receptor_pdbqt: str = None
+    extract_box: ADVExtractBoxTP = ADVExtractBoxTP()
+
+
+class StepAutoDockVinaTargetPreparation(StepBase, BaseModel):
+    _openbabel_executor: OpenBabelExecutor = None
+    adv_additional: ADVAdditionalTP = None
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # initialize the executor and test availability
+        self._initialize_backend(executor=AutoDockVinaExecutor)
+        self._check_backend_availability()
+
+        # initialize the executor for all "OpenBabel"
+        self._openbabel_executor = OpenBabelExecutor()
+        if not self._openbabel_executor.is_available():
+            raise StepFailed(
+                "AutoDock Vina requires OpenBabel execution, initialization failed."
+            )
+
+        # set ADV specific settings and ensure that each molecule gets its own sublist
+        self.adv_additional = ADVAdditionalTP(**self.settings.additional)
+
+    def _export_as_pdb2pdbqt(self):
+        # Note: In contrast to the ligand preparation, we will not use a tree-based flexibility treatment here - thus,
+        #       the option "-xr" is used. Partial charges of the receptor are not used in AutoDock Vina.
+        arguments = [
+            " ".join(
+                [_OBE.OBABEL_INPUTFORMAT_PDB, self.adv_additional.input_receptor_pdb]
+            ),
+            _OBE.OBABEL_OUTPUT_FORMAT_PDBQT,
+            " ".join([_OBE.OBABEL_O, self.adv_additional.output_receptor_pdbqt]),
+            "".join([_OBE.OBABEL_X, _OBE.OBABEL_X_R]),
+            _OBE.OBABEL_P,
+            str(self.adv_additional.pH),
+            _OBE.OBABEL_PARTIALCHARGE,
+            _OBE.OBABEL_PARTIALCHARGE_GASTEIGER,
+        ]
+        self._openbabel_executor.execute(
+            command=_OBE.OBABEL, arguments=arguments, check=True
+        )
+        self._logger.log(
+            f"Exported target as PDBQT file {self.adv_additional.output_receptor_pdbqt}.",
+            _LE.INFO,
+        )
+
+    def _log_extract_box(self):
+        x_coords, y_coords, z_coords = self._extract_box()
+        if x_coords is not None:
+
+            def dig(value):
+                return round(value, ndigits=2)
+
+            self._logger.log(
+                f"Calculating lingad dimensions for AutoDock Vina docking protocol.",
+                _LE.INFO,
+            )
+            self._logger.log(
+                f"Ligand ({self.adv_additional.extract_box.reference_ligand_path}):",
+                _LE.INFO,
+            )
+            self._logger_blank.log(
+                f"X coordinates: min={dig(min(x_coords))}, max={dig(max(x_coords))}, mean={dig(sum(x_coords) / len(x_coords))}",
+                _LE.INFO,
+            )
+            self._logger_blank.log(
+                f"Y coordinates: min={dig(min(y_coords))}, max={dig(max(y_coords))}, mean={dig(sum(y_coords) / len(y_coords))}",
+                _LE.INFO,
+            )
+            self._logger_blank.log(
+                f"Z coordinates: min={dig(min(z_coords))}, max={dig(max(z_coords))}, mean={dig(sum(z_coords) / len(z_coords))}",
+                _LE.INFO,
+            )
+
+    def _extract_box(self):
+        # extracts box suggestions from a reference ligand, which can be added to a AutoDock Vina run
+        # load the reference file (PDB or SDF)
+        ref_format = self.adv_additional.extract_box.reference_ligand_format.upper()
+        if ref_format == _STE.EXTRACT_BOX_REFERENCE_LIGAND_FORMAT_PDB:
+            ref_mol = Chem.MolFromPDBFile(
+                self.adv_additional.extract_box.reference_ligand_path, sanitize=True
+            )
+        elif ref_format == _STE.EXTRACT_BOX_REFERENCE_LIGAND_FORMAT_SDF:
+            mol_supplier = Chem.SDMolSupplier(
+                self.adv_additional.extract_box.reference_ligand_path
+            )
+            for mol in mol_supplier:
+                if mol is None:
+                    raise StepFailed(
+                        f"Could not load molecule from {self.adv_additional.extract_box.reference_ligand_path} - abort."
+                    )
+                ref_mol = mol
+                break
+        else:
+            raise StepFailed(
+                f"Reference ligand format {ref_format} not supported, use PDB or SDF instead - abort."
+            )
+
+        # extract coordinates
+        x_coords = [atom[0] for atom in ref_mol.GetConformer(0).GetPositions()]
+        y_coords = [atom[1] for atom in ref_mol.GetConformer(0).GetPositions()]
+        z_coords = [atom[2] for atom in ref_mol.GetConformer(0).GetPositions()]
+        return x_coords, y_coords, z_coords
+
+    def execute(self):
+        # translate input PDB file into output PDBQT file
+        self._export_as_pdb2pdbqt()
+
+        # extract and log the "box" dimensions based on the reference ligand
+        self._log_extract_box()
diff --git a/icolos/core/workflow_steps/calculation/__init__.py b/icolos/core/workflow_steps/calculation/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/core/workflow_steps/calculation/base.py b/icolos/core/workflow_steps/calculation/base.py
new file mode 100644
index 0000000..0e99f61
--- /dev/null
+++ b/icolos/core/workflow_steps/calculation/base.py
@@ -0,0 +1,52 @@
+import numpy as np
+import pandas as pd
+
+from pydantic import BaseModel
+from rdkit.Chem import AllChem
+from typing import List
+
+from icolos.core.containers.compound import Conformer
+
+from icolos.core.workflow_steps.step import StepBase
+from icolos.utils.enums.step_enums import StepRMSFilterEnum
+
+_SRF = StepRMSFilterEnum()
+
+
+class StepCalculationBase(StepBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def _get_rms_method(self):
+        # there are two modes for the execution: "best" is better, but sometimes has performance issues
+        # for larger molecules
+        if self.settings.additional[_SRF.METHOD] == _SRF.METHOD_ALIGNMOL:
+            return AllChem.AlignMol
+        elif self.settings.additional[_SRF.METHOD] == _SRF.METHOD_BEST:
+            return AllChem.GetBestRMS
+        else:
+            raise ValueError(
+                f"RMS mode {self.settings.arguments.parameters[_SRF.METHOD]} not supported (either {_SRF.METHOD_ALIGNMOL} or {_SRF.METHOD_BEST})."
+            )
+
+    @staticmethod
+    def _get_property_values(conformers: List[Conformer], prop: str) -> List[float]:
+        return [float(conf.get_molecule().GetProp(prop)) for conf in conformers]
+
+    @staticmethod
+    def _calculate_rms_matrix(
+        conformers: List[Conformer], rms_method, decimals=3
+    ) -> pd.DataFrame:
+        n_conf = len(conformers)
+        df_rms = pd.DataFrame(np.nan, index=range(n_conf), columns=range(n_conf))
+        np.fill_diagonal(df_rms.values, 0)
+
+        for i in range(n_conf - 1):
+            for j in range(i + 1, n_conf):
+                df_rms.iloc[i, j] = df_rms.iloc[j, i] = np.round(
+                    rms_method(
+                        conformers[i].get_molecule(), conformers[j].get_molecule()
+                    ),
+                    decimals=decimals,
+                )
+        return df_rms
diff --git a/icolos/core/workflow_steps/calculation/boltzmann_weighting.py b/icolos/core/workflow_steps/calculation/boltzmann_weighting.py
new file mode 100644
index 0000000..581c0cd
--- /dev/null
+++ b/icolos/core/workflow_steps/calculation/boltzmann_weighting.py
@@ -0,0 +1,98 @@
+from copy import deepcopy
+
+import numpy as np
+from typing import List
+
+from pydantic import BaseModel
+
+from icolos.core.containers.compound import Enumeration, Conformer
+
+from icolos.utils.enums.step_enums import StepBoltzmannWeightingEnum
+from icolos.core.workflow_steps.calculation.base import StepCalculationBase
+
+from icolos.utils.general.convenience_functions import *
+from icolos.utils.constants import *
+
+_SBWE = StepBoltzmannWeightingEnum()
+
+
+class StepBoltzmannWeighting(StepCalculationBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def _get_relative_energy_values(
+        self, conformers: List[Conformer], property_name: str
+    ) -> List[float]:
+        values = [float(c.get_molecule().GetProp(property_name)) for c in conformers]
+        min_val = min(values)
+        relative_values = [value - min_val for value in values]
+        return relative_values
+
+    def _obtain_factors(self, relative_values: List[float]) -> List[float]:
+        # calculate individual Boltzmann factors
+        individual_factors = [
+            np.exp((-1 * val / (CONSTANT_KB * CONSTANT_T))) for val in relative_values
+        ]
+
+        # calculate and return Boltzmann factors
+        sum_factors = sum(individual_factors)
+        factors = [val / sum_factors for val in individual_factors]
+        return factors
+
+    def _calculate_Boltzmann_factors(
+        self, enumeration: Enumeration, parameters: dict
+    ) -> List[str]:
+        list_properties = parameters[_SBWE.PROPERTIES]
+        list_output_names = []
+        for prop in list_properties:
+            # (1) get the relative values for this property (e.g. solvent) for all conformers in respect to the one
+            # with the minimal energy
+            relative_prop_values = self._get_relative_energy_values(
+                conformers=enumeration.get_conformers(),
+                property_name=prop[_SBWE.PROPERTIES_INPUT],
+            )
+
+            # (2) calculate the Boltzmann factors for this property
+            boltzmann_factors = self._obtain_factors(
+                relative_values=relative_prop_values
+            )
+
+            # (3) add the Boltzmann factors to the conformers as a tag
+            for c, bm_factor in zip(enumeration.get_conformers(), boltzmann_factors):
+                c.get_molecule().SetProp(prop[_SBWE.PROPERTIES_OUTPUT], str(bm_factor))
+            list_output_names.append(prop[_SBWE.PROPERTIES_OUTPUT])
+        return list_output_names
+
+    def _do_Boltzmann_weighting(self, conformers: List[Conformer], weightings: dict):
+        input_tags = weightings[_SBWE.WEIGHT_INPUT]
+        output_prefix = nested_get(
+            weightings, _SBWE.WEIGHT_OUTPUT_PREFIX, default="bf_weighted"
+        )
+        properties = weightings[_SBWE.WEIGHT_PROPERTIES]
+        for prop in properties:
+            for inp_tag in input_tags:
+                new_tag_name = "_".join([output_prefix, inp_tag, prop])
+                products = []
+                for conformer in conformers:
+                    conf = conformer.get_molecule()
+                    products.append(
+                        float(conf.GetProp(prop)) * float(conf.GetProp(inp_tag))
+                    )
+                for conformer in conformers:
+                    conformer.get_molecule().SetProp(new_tag_name, str(sum(products)))
+
+    def execute(self):
+        parameters = deepcopy(self.settings.arguments.parameters)
+        for compound in self.get_compounds():
+            for enumeration in compound.get_enumerations():
+                if self._input_object_empty(enumeration):
+                    continue
+
+                # get the name of the Boltzmann properties / solvents and annotate the factors
+                _ = self._calculate_Boltzmann_factors(enumeration, parameters)
+
+                # for each property and each weighting, add the respective tags
+                self._do_Boltzmann_weighting(
+                    conformers=enumeration.get_conformers(),
+                    weightings=parameters[_SBWE.WEIGHT],
+                )
diff --git a/icolos/core/workflow_steps/calculation/clustering.py b/icolos/core/workflow_steps/calculation/clustering.py
new file mode 100644
index 0000000..ae0f99e
--- /dev/null
+++ b/icolos/core/workflow_steps/calculation/clustering.py
@@ -0,0 +1,140 @@
+import pandas as pd
+from typing import List, Tuple
+
+from pydantic import BaseModel
+
+from icolos.core.containers.compound import Conformer
+
+from icolos.utils.enums.step_enums import StepClusteringEnum
+from icolos.core.workflow_steps.step import _LE
+from icolos.core.workflow_steps.calculation.base import StepCalculationBase
+
+from sklearn.cluster import KMeans
+
+_SC = StepClusteringEnum()
+
+
+class StepClustering(StepCalculationBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # extend parameters
+        if _SC.N_CLUSTERS not in self.settings.arguments.parameters.keys():
+            self.settings.arguments.parameters[_SC.N_CLUSTERS] = 3
+        if _SC.MAX_ITER not in self.settings.arguments.parameters.keys():
+            self.settings.arguments.parameters[_SC.MAX_ITER] = 300
+        if _SC.TOP_N_PER_SOLVENT not in self.settings.additional.keys():
+            self.settings.additional[_SC.TOP_N_PER_SOLVENT] = 3
+
+    def _get_nclusters_and_top_n(self, len_conformers: int) -> Tuple[int, int]:
+        n_clusters = self.settings.arguments.parameters[_SC.N_CLUSTERS]
+        if n_clusters > len_conformers:
+            n_clusters = len_conformers
+            self._logger.log(
+                f"Set number of clusters to {n_clusters} because not enough observations were provided.",
+                _LE.DEBUG,
+            )
+        top_n_per_solvent = self.settings.additional[_SC.TOP_N_PER_SOLVENT]
+        if top_n_per_solvent > len_conformers:
+            top_n_per_solvent = len_conformers
+            self._logger.log(
+                f'Set number of "top_N_per_solvent" to {top_n_per_solvent} because not enough observations were provided.',
+                _LE.DEBUG,
+            )
+        return n_clusters, top_n_per_solvent
+
+    def _generate_feature_dataframe(self, conformers: List[Conformer]) -> pd.DataFrame:
+        features = self.settings.additional[_SC.FEATURES]
+        df_features = pd.DataFrame(columns=features)
+        for conf in conformers:
+            new_row = {}
+            for feature in features:
+                new_row[feature] = float(conf.get_molecule().GetProp(feature))
+            df_features = df_features.append(new_row, ignore_index=True)
+        return df_features
+
+    def _get_representative_conformers(
+        self, cluster_set: List[Tuple[int, Conformer]]
+    ) -> List[int]:
+        # for each selection (e.g. solvent), obtain the N top conformers (note, that the input is already clustered)
+        # also get rid of duplicates in the indices
+        rep_indices = []
+        for solvent_key in self.settings.additional[_SC.FREE_ENERGY_SOLVENT_TAGS]:
+            conf_indices = [tuple_conf[0] for tuple_conf in cluster_set]
+            solvent_dGs = [
+                float(tuple_conf[1].get_molecule().GetProp(solvent_key))
+                for tuple_conf in cluster_set
+            ]
+
+            # sort list of global indices for this cluster according to their free energy for this solvent
+            # note: from lowest (most negative) -> highest
+            conf_indices_sorted = [
+                idx for _, idx in sorted(zip(solvent_dGs, conf_indices))
+            ]
+            rep_indices = (
+                rep_indices
+                + conf_indices_sorted[
+                    0 : min(
+                        len(conf_indices),
+                        self.settings.additional[_SC.TOP_N_PER_SOLVENT],
+                    )
+                ]
+            )
+        return list(set(rep_indices))
+
+    def _cluster_conformers(self, conformers: List[Conformer]) -> List[Conformer]:
+        # make sure the number of clusters specified and "N top per solvent" are not higher than the compound number
+        n_clusters, top_n_per_solvent = self._get_nclusters_and_top_n(
+            len_conformers=len(conformers)
+        )
+
+        # initialize K-means instance
+        kmeans = KMeans(
+            n_clusters=n_clusters,
+            max_iter=self.settings.arguments.parameters[_SC.MAX_ITER],
+            init="k-means++",
+            n_init=10,
+            tol=1e-04,
+            random_state=0,
+        )
+
+        # generate dataframe with selected properties
+        df_features = self._generate_feature_dataframe(conformers=conformers)
+
+        # predict cluster and assign to conformer
+        cluster_labels = kmeans.fit_predict(df_features)
+        keep_indices = []
+        for cluster_label in range(n_clusters):
+            # keep the "global" index to select the appropriate conformers later
+            cluster_set = [
+                (i, conformers[i])
+                for i in range(len(conformers))
+                if cluster_labels[i] == cluster_label
+            ]
+            keep_indices = keep_indices + self._get_representative_conformers(
+                cluster_set=cluster_set
+            )
+        return [conformers[i] for i in range(len(conformers)) if i in keep_indices]
+
+    def execute(self):
+        for compound in self.get_compounds():
+            for enumeration in compound.get_enumerations():
+                if len(enumeration.get_conformers()) == 0:
+                    continue
+
+                number_conformers_before = len(enumeration)
+
+                # cluster conformers on the enumeration level
+                clustered_conformers = self._cluster_conformers(
+                    conformers=enumeration.get_conformers()
+                )
+
+                # add clustered conformers to enumeration
+                enumeration.clear_conformers()
+                for conf in clustered_conformers:
+                    enumeration.add_conformer(conformer=conf, auto_update=True)
+                number_conformers_after = len(enumeration)
+                self._logger.log(
+                    f"Clustered {number_conformers_before} into {number_conformers_after} conformers for enumeration {enumeration.get_index_string()}.",
+                    _LE.INFO,
+                )
diff --git a/icolos/core/workflow_steps/calculation/cosmo.py b/icolos/core/workflow_steps/calculation/cosmo.py
new file mode 100644
index 0000000..2fa112a
--- /dev/null
+++ b/icolos/core/workflow_steps/calculation/cosmo.py
@@ -0,0 +1,311 @@
+import os
+import tempfile
+from typing import Tuple, List
+from copy import deepcopy
+
+from pydantic import BaseModel
+
+from icolos.utils.execute_external.turbomole import TurbomoleExecutor
+
+from icolos.core.containers.compound import Conformer, Enumeration
+
+from icolos.utils.enums.program_parameters import TurbomoleEnum
+from icolos.utils.enums.program_parameters import CosmoOutputEnum
+from icolos.utils.enums.compound_enums import ConformerContainerEnum
+from icolos.core.workflow_steps.calculation.base import StepCalculationBase
+from icolos.core.workflow_steps.step import _LE
+from icolos.loggers.logger_utils import log_multiline_string
+from icolos.utils.general.files_paths import attach_root_path
+
+_EE = TurbomoleEnum()
+_CTE = ConformerContainerEnum()
+_COE = CosmoOutputEnum()
+
+
+class StepCosmo(StepCalculationBase, BaseModel):
+    """Step that executes Cosmo.
+
+    Note, that the execution (especially in conjunction with a preceding turbomole step) is relatively complex.
+    (1) Take the coord file from the additional data attached to the conformers,
+    (2) run Cosmo,
+    (3) extract the final XYZ snapshot with x2t,
+    (4) translate it to a SDF file with obabel and
+    (5) combined the new coordinates with the tags."""
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # initialize the executor and test availability
+        self._initialize_backend(executor=TurbomoleExecutor)
+        self._check_backend_availability()
+
+        # initialize the executor and test availability
+        # as they are linked, use a "TurbomoleExecutor" here
+        self._initialize_backend(executor=TurbomoleExecutor)
+        self._check_backend_availability()
+
+    def _prepare_tmp_input_directories(
+        self, enumeration: Enumeration
+    ) -> Tuple[List[str], List[str], List[str], List[str]]:
+        tmp_dirs = []
+        paths_input_cosmofile = []
+        paths_config_cosmotherm = []
+        paths_output_cosmotherm = []
+        for conformer in enumeration:
+            # 1) generate all temporary paths
+            tmp_dir = tempfile.mkdtemp()
+            path_input_cosmofile = os.path.join(tmp_dir, _EE.TM_OUTPUT_COSMOFILE)
+            path_config_cosmofile = os.path.join(tmp_dir, _EE.CT_COSMOTHERM_CONFIG_FILE)
+            path_output_cosmotherm = os.path.join(
+                tmp_dir, _EE.CT_COSMOTHERM_OUTPUT_FILE
+            )
+
+            # 2) write-out the COSMO file
+            #    Note, that the generation of the COSMO files is part of the Turbomole execution. The reason is, that
+            #    the generation is complicated and uses a lot of input form the TM step, thus "cosmoprep" is
+            #    executed there.
+            if _CTE.EXTRA_DATA_COSMOFILE not in conformer.get_extra_data().keys():
+                self._logger.log(
+                    f"In order to write out COSMO files, the content needs to be annotated as extra data in the conformers. Have you executed Turbomole before?",
+                    _LE.ERROR,
+                )
+                raise ValueError("Could not find COSMO data to write out - abort.")
+            with open(path_input_cosmofile, "w") as f:
+                f.writelines(conformer.get_extra_data()[_CTE.EXTRA_DATA_COSMOFILE])
+
+            # 3) add paths
+            tmp_dirs.append(tmp_dir)
+            paths_input_cosmofile.append(path_input_cosmofile)
+            paths_config_cosmotherm.append(path_config_cosmofile)
+            paths_output_cosmotherm.append(path_output_cosmotherm)
+
+        return (
+            tmp_dirs,
+            paths_input_cosmofile,
+            paths_config_cosmotherm,
+            paths_output_cosmotherm,
+        )
+
+    def _execute_run(self, config_path: str):
+        result = self._backend_executor.execute(
+            command=_EE.CT_COSMOTHERM, arguments=[config_path], check=True
+        )
+        if _EE.CT_COSMOTHERM_FAIL_STRING in result.stderr:
+            self._logger.log(
+                f"Execution of {_EE.CT_COSMOTHERM} failed. Error message:", _LE.ERROR
+            )
+            log_multiline_string(
+                logger=self._logger_blank,
+                level=_LE.ERROR,
+                multi_line_string=result.stdout,
+            )
+
+    def _write_config_file(self, config_path: str):
+        # by default use the internal configuration, but if one has been specified, use this one
+        # note, that the default name of the COSMO file is "mol.cosmo", so this should be used in any config file
+        if _EE.CT_CONFIG not in self.settings.arguments.parameters.keys():
+            with open(attach_root_path(_EE.CT_CONFIG_DEFAULTPATH), "r") as f:
+                config = f.readlines()
+            self._logger.log(
+                f"Loaded {_EE.CT_COSMOTHERM} configuration from default file {_EE.CT_CONFIG_DEFAULTPATH}.",
+                _LE.DEBUG,
+            )
+        else:
+            config = self.settings.arguments.parameters[_EE.CT_CONFIG]
+        with open(config_path, "w") as f:
+            f.writelines([line.rstrip("\n") + "\n" for line in config])
+
+    def _get_line_by_pattern(self, lines: List[str], pattern: str) -> str:
+        for line in lines:
+            if pattern in line:
+                return line
+
+    def _get_values_from_line(self, line: str) -> List[str]:
+        try:
+            value_part = line.split(":")[1]
+            return value_part.split()
+        except Exception:
+            return []
+
+    def _annotate_from_output_block(
+        self, conformer: Conformer, block: List[str], annotation: dict
+    ):
+        for key in annotation.keys():
+            # get the line with the values
+            line = self._get_line_by_pattern(
+                lines=block, pattern=annotation[key][_COE.PATTERN]
+            )
+            if line is None:
+                continue
+
+            # get the values and select the one that is to be added
+            try:
+                values = self._get_values_from_line(line=line)
+                value = values[annotation[key][_COE.ELEMENT]]
+            except IndexError:
+                continue
+
+            # add it as a tag to the conformer; we can replace part of the tag name with e.g. the solvent
+            # names if we need to
+            conformer.get_molecule().SetProp(key, value)
+
+    def _get_solvents_from_header(self, header: List[str]):
+        line_solvents = self._get_line_by_pattern(
+            header, pattern=_COE.SOLVENT_BLOCK_HEADER_COMPOUNDS_PATTERN
+        )
+        return self._get_values_from_line(line_solvents)
+
+    def _get_current_solvent_from_header(self, header: List[str]):
+        line_mol_fraction = self._get_line_by_pattern(
+            header, pattern=_COE.SOLVENT_BLOCK_HEADER_MOLFRACTION_PATTERN
+        )
+        solvent_index = self._get_values_from_line(line_mol_fraction).index(
+            _COE.SOLVENT_BLOCK_CURRENT_FRACTION_VALUE
+        )
+        return self._get_solvents_from_header(header)[solvent_index]
+
+    def _parse_general_block(self, lines: List[str], conformer: Conformer):
+        general_block = []
+        for index in range(len(lines)):
+            if _COE.GENERAL_BLOCK_PATTERN_STRING in lines[index]:
+                # skip the first lines after the header
+                index += 2
+                while not lines[index] == "":
+                    general_block.append(lines[index])
+                    index += 1
+                break
+        self._annotate_from_output_block(
+            conformer=conformer,
+            block=general_block,
+            annotation=_COE.GENERAL_BLOCK_ANNOTATIONS,
+        )
+
+    def _load_solvent_blocks(self, lines: List[str]) -> List[dict]:
+        solvent_blocks = []
+        index = 0
+        while index < len(lines):
+            if _COE.SOLVENT_BLOCK_PATTERN_STRING in lines[index]:
+                # we need to extract both the header (which solvent?) and the body (actual values)
+                new_block = {"header": [], "body": []}
+                # go back to start of block
+                while (
+                    index >= 0 and _COE.SOLVENT_BLOCK_START_PATTERN not in lines[index]
+                ):
+                    index -= 1
+
+                # extract the header
+                while _COE.SOLVENT_BLOCK_BODY_START_PATTERN not in lines[index]:
+                    new_block["header"].append(lines[index])
+                    index += 1
+
+                # extract the body
+                while index < len(lines) and not (
+                    lines[index] == "" and lines[index + 1] == ""
+                ):
+                    new_block["body"].append(lines[index])
+                    index += 1
+                solvent_blocks.append(new_block)
+            index += 1
+        return solvent_blocks
+
+    def _annotate_solvent_blocks(
+        self, solvent_blocks: List[dict], conformer: Conformer
+    ):
+        for block_dict in solvent_blocks:
+            # get solvent and translate according to internal solvent abbreviation table
+            try:
+                current_solvent = self._get_current_solvent_from_header(
+                    block_dict["header"]
+                )
+                if current_solvent in _COE.SOLVENT_TRANSLATE_SOLVENT.keys():
+                    current_solvent = _COE.SOLVENT_TRANSLATE_SOLVENT[current_solvent]
+            except ValueError:
+                continue
+
+            # overwrite the solvent name placeholder in and annotate
+            template_annotations = deepcopy(_COE.SOLVENT_BLOCK_BODY_ANNOTATIONS)
+            annotations = {}
+            for key in template_annotations.keys():
+                new_key = key.replace(_COE.SOLVENT_REPLACEHOLDER, current_solvent)
+                annotations[new_key] = template_annotations[key]
+
+            # annotate
+            self._annotate_from_output_block(
+                conformer=conformer, block=block_dict["body"], annotation=annotations
+            )
+
+    def _parse_output(self, path_output: str, conformer: Conformer):
+        # there are two sets of blocks we need to parse: the "general" block, that is always present and, if specified,
+        # free energies from solvents ("mixtures")
+        # 1) load the file
+        with open(path_output, "r") as f:
+            lines = f.readlines()
+            lines = [line.rstrip("\n") for line in lines]
+
+        # 2) extract the general block: from the match of the pattern line until the second empty line occurs
+        #    e.g. "--- Compound 1 (mol) ---\n\nAtomic weights : 111111\n <etc> ...\n\n"
+        self._parse_general_block(lines, conformer)
+
+        # 3) extract the solvent blocks (if available)
+        #    search for the first occurrence of a Gibb's free energy and expand top until the pattern line is found and
+        #    to bottom until more than one empty line is hit; proceed until all blocks are processed
+        solvent_blocks = self._load_solvent_blocks(lines)
+
+        if len(solvent_blocks) > 0:
+            self._annotate_solvent_blocks(solvent_blocks, conformer)
+
+    def execute(self):
+        for compound in self.get_compounds():
+            for enumeration in compound.get_enumerations():
+                if len(enumeration.get_conformers()) == 0:
+                    continue
+
+                # generate copies of the conformers, as to not accidentally manipulate them
+                inp_enum = deepcopy(enumeration)
+
+                # prepare the temporary files and retrieve paths (TM config is charge-state dependent!)
+                (
+                    tmp_dirs,
+                    paths_input_cosmofile,
+                    paths_config_cosmotherm,
+                    paths_output_cosmotherm,
+                ) = self._prepare_tmp_input_directories(enumeration=inp_enum)
+
+                # execute individual conformers
+                for (
+                    tmp_dir,
+                    path_config_cosmotherm,
+                    conformer,
+                    path_output_cosmotherm,
+                ) in zip(
+                    tmp_dirs,
+                    paths_config_cosmotherm,
+                    enumeration.get_conformers(),
+                    paths_output_cosmotherm,
+                ):
+                    self._move_to_dir(tmp_dir)
+
+                    # set a necessary environment variable to avoid clashes
+                    os.environ[_EE.TM_TURBOTMPDIR] = tmp_dir
+
+                    # write configuration file
+                    self._write_config_file(config_path=path_config_cosmotherm)
+
+                    # all ready; start the execution
+                    self._execute_run(config_path=path_config_cosmotherm)
+
+                    # parse the results
+                    self._parse_output(
+                        path_output=path_output_cosmotherm, conformer=conformer
+                    )
+
+                # restore working directory and remove temporary files
+                self._restore_working_dir()
+                for tmp_dir in tmp_dirs:
+                    if os.path.isdir(tmp_dir):
+                        self._remove_temporary(tmp_dir)
+
+                self._logger.log(
+                    f"Executed COSMO for {len(enumeration.get_conformers())} conformers for enumeration {enumeration.get_index_string()}.",
+                    _LE.INFO,
+                )
diff --git a/icolos/core/workflow_steps/calculation/electrostatics/__init__.py b/icolos/core/workflow_steps/calculation/electrostatics/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/core/workflow_steps/calculation/electrostatics/cresset_ec.py b/icolos/core/workflow_steps/calculation/electrostatics/cresset_ec.py
new file mode 100644
index 0000000..2955351
--- /dev/null
+++ b/icolos/core/workflow_steps/calculation/electrostatics/cresset_ec.py
@@ -0,0 +1,108 @@
+from copy import deepcopy
+from typing import List, Tuple
+from icolos.core.workflow_steps.step import StepBase
+from pydantic import BaseModel
+import tempfile
+from icolos.utils.enums.step_enums import StepCressetEnum
+from icolos.utils.execute_external.cresset_executor import CressetExecutor
+from icolos.utils.general.files_paths import gen_tmp_file
+from icolos.core.workflow_steps.step import _LE
+import os
+from icolos.utils.general.parallelization import Parallelizer, SubtaskContainer
+
+
+_SCE = StepCressetEnum()
+
+
+class StepCressetEC(StepBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=CressetExecutor)
+        self._check_backend_availability()
+
+    def _prepare_tmp_input(self, batch: List) -> Tuple[List, List]:
+        conformers = []
+        tmp_dirs = []
+        protein = self.data.generic.get_argument_by_extension(
+            "pdb", rtn_file_object=True
+        )
+        for sublist in batch:
+            for task in sublist:
+                conformer = task.data
+                conformers.append(conformer)
+
+                # generate the tmpdir
+                tmp_dir = tempfile.mkdtemp()
+                tmp_dirs.append(tmp_dir)
+                _, path_input_sdf = gen_tmp_file(
+                    prefix="tmp_", suffix=".sdf", dir=tmp_dir
+                )
+                conformer.write(path=path_input_sdf)
+
+                # write the protein to that tmpdir
+                protein.write(path=os.path.join(tmp_dir, "protein.pdb"), join=False)
+
+        return conformers, tmp_dirs
+
+    def _execute_cresset_ec_parallel(self):
+        parallelizer = Parallelizer(func=self._run_conformer)
+        n = 1
+
+        while self._subtask_container.done() is False:
+
+            next_batch = self._get_sublists(
+                get_first_n_lists=self._get_number_cores()
+            )  # return n lists of length max_sublist_length
+            _ = [sub.increment_tries() for element in next_batch for sub in element]
+            _ = [sub.set_status_failed() for element in next_batch for sub in element]
+
+            conformers, tmp_dirs = self._prepare_tmp_input(next_batch)
+            self._logger.log(
+                f"Executing Cresset EC for batch {n} containing {len(conformers)} conformers",
+                _LE.DEBUG,
+            )
+
+            parallelizer.execute_parallel(tmp_dir=tmp_dirs, conformer=conformers)
+
+            results = self._parse_results(tmp_dirs, conformers)
+
+            for sublist, result in zip(next_batch, results):
+                # TODO: this only works if max length sublist == 1, fine for now as that is all turbomole can handle
+                for task in sublist:
+                    if result == _SCE.SUCCESS:
+                        task.set_status_success()
+                    else:
+                        task.set_status_failed()
+            self._remove_temporary(tmp_dirs)
+            n += 1
+
+    def _parse_results(self, tmp_dirs: List, conformers: List):
+        # walk over the directory structure, parse the output file, identify the conformer, attach a tag to the mol object
+        # TODO: No idea what the output looks like for this, write the parser!!
+        pass
+
+    def execute(self):
+        # unroll all conformers
+        all_conformers = []
+        for compound in self.get_compounds():
+            for enum in compound.get_enumerations():
+                if self._input_object_empty(enum):
+                    continue
+                else:
+                    for conformer in enum.get_conformers():
+                        conf = deepcopy(conformer)
+                        all_conformers.append(conf)
+
+        self._subtask_container = SubtaskContainer(
+            max_tries=self.execution.failure_policy.n_tries
+        )
+        self._subtask_container.load_data(all_conformers)
+        self._execute_cresset_ec_parallel()
+
+    def _run_conformer(self):
+        # run a single conformer through Flare's EC
+        self._backend_executor.execute()
+
+    # execution is
+    # module load Flare && pyflare electrostaticcomplementarity.py -p protein.pdb ligands.sdf
diff --git a/icolos/core/workflow_steps/calculation/electrostatics/esp_sim.py b/icolos/core/workflow_steps/calculation/electrostatics/esp_sim.py
new file mode 100644
index 0000000..a3b0c95
--- /dev/null
+++ b/icolos/core/workflow_steps/calculation/electrostatics/esp_sim.py
@@ -0,0 +1,152 @@
+from copy import deepcopy
+import tempfile
+from typing import List
+from icolos.core.containers.compound import Conformer, Enumeration
+from icolos.core.workflow_steps.step import StepBase
+from pydantic import BaseModel
+
+try:
+    from espsim import EmbedAlignConstrainedScore
+except ImportError:
+    print(
+        "WARNING - Could not import module espsim, check it is installed in your environment"
+    )
+
+from rdkit.Chem import AllChem, Mol
+from rdkit import Chem
+from rdkit.Chem import rdFMCS
+from icolos.utils.general.parallelization import SubtaskContainer, Parallelizer
+import os
+
+# Based on https://github.com/hesther/espsim
+
+
+class StepEspSim(StepBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def _compute_esp_sim(self, ref: Mol, trg: Enumeration, tmp_dir: str):
+        """
+        :param ref : Reference molecule, the known binder against which to calculate similarity
+        :param trg: Icolos enumeration of the target molecule, as a smiles string.  Embedded with RDKit
+        """
+        # Create mol object from trg smile string
+        # housekeeping for data appending later
+
+        trg_mol = Chem.AddHs(Chem.MolFromSmiles(trg.get_smile()))
+
+        # get the mol object for the max common substructure
+        mcs = Chem.MolFromSmarts(rdFMCS.FindMCS([ref, trg_mol]).smartsString)
+        mcs = Chem.MolToSmiles(mcs)
+
+        patt = Chem.MolFromSmiles(mcs, sanitize=False)
+        helper = Chem.AddHs(Chem.MolFromSmiles(mcs))
+
+        # Embed first reference molecule, create one conformer
+        AllChem.EmbedMolecule(helper, AllChem.ETKDG())
+
+        # Optimize the coordinates of the conformer
+        AllChem.UFFOptimizeMolecule(helper)
+        core = AllChem.DeleteSubstructs(
+            AllChem.ReplaceSidechains(helper, patt), Chem.MolFromSmiles("*")
+        )  # Create core molecule with 3D coordinates
+        core.UpdatePropertyCache()
+
+        core = AllChem.DeleteSubstructs(
+            AllChem.ReplaceSidechains(helper, patt), Chem.MolFromSmiles("*")
+        )  # Create core molecule with 3D coordinates
+        core.UpdatePropertyCache()
+
+        args = [ref, trg_mol, core]
+
+        args = self._get_arguments(args)
+
+        simShape, simEsp = EmbedAlignConstrainedScore(*args)
+
+        # now attach the mols as conformersattach the scores to the mol objects
+        trg_conf = Conformer(conformer=trg_mol)
+        trg_conf.get_molecule().SetProp("shape_sim", str(simShape[0]))
+        trg_conf.get_molecule().SetProp("esp_sim", str(simEsp[0]))
+
+        trg_conf.write(os.path.join(tmp_dir, "conformer.sdf"))
+
+    def _get_arguments(self, std_args: List) -> List:
+
+        for flag in self.settings.arguments.flags:
+            std_args.append(flag)
+        for key, value in self.settings.arguments.parameters:
+            std_args.append(key)
+            std_args.append(value)
+        return std_args
+
+    def _prepare_batch(self, batch):
+        target_enums = []
+        tmp_dirs = []
+
+        for sublist in batch:
+            for task in sublist:
+                target_enums.append(task.data)
+                tmp_dirs.append(tempfile.mkdtemp())
+        return target_enums, tmp_dirs
+
+    def _parse_output(self, trgs: List[Enumeration], tmp_dirs: List[str]) -> None:
+        for tmp_dir, trg in zip(tmp_dirs, trgs):
+            # grab the written sdf object
+            sdf_path = os.path.join(tmp_dir, "conformer.sdf")
+            mol_supplier = Chem.SDMolSupplier(sdf_path, removeHs=False)
+            for mol in mol_supplier:  # should only be one conformer!
+                conf = Conformer(conformer=mol)
+                comp = self.get_compound_by_name(trg.get_compound_name())
+                comp.find_enumeration(trg.get_enumeration_id()).add_conformer(conf)
+
+        self._remove_temporary(tmp_dirs)
+
+    def _execute_espsim_parallel(self):
+        # embed the reference compound
+        ref_compound = Chem.AddHs(
+            Chem.MolFromSmiles(self.settings.additional["ref_smiles"])
+        )
+
+        parallelizer = Parallelizer(func=self._compute_esp_sim)
+
+        while self._subtask_container.done() is False:
+            next_batch = self._get_sublists(get_first_n_lists=self._get_number_cores())
+
+            _ = [sub.increment_tries() for element in next_batch for sub in element]
+            _ = [sub.set_status_failed() for element in next_batch for sub in element]
+
+            trgs, tmp_dirs = self._prepare_batch(next_batch)
+
+            refs = [ref_compound for _ in range(len(next_batch))]
+
+            parallelizer.execute_parallel(ref=refs, trg=trgs, tmp_dir=tmp_dirs)
+            # hand over the embedded reference (compute once) and target compound (smiles string to be embedded)
+            self._parse_output(tmp_dirs=tmp_dirs, trgs=trgs)
+
+            for task in next_batch:
+                for subtask in task:
+                    # TODO: Check return codes
+                    subtask.set_status_success()
+
+    def execute(self):
+        """
+        esp-sim does molecular alignment with RDkit, then computes coulombic overlap integral + tanimoto similarity for shape measurement
+
+        Use case takes a reference compound (known binder) and compare to REINVENT compounds
+
+        Usage:
+        * Define reference compound using settings.additional, as a smile string, to be embedded by RDkit
+        * The remaining compounds are embedded using a preceeding RDkit embedding
+        * attach the resulting scores to the enumeration
+        """
+
+        all_enums = []
+        for compound in self.get_compounds():
+            for enumeration in compound:
+                all_enums.append(deepcopy(enumeration))
+
+        self.execution.parallelization.max_length_sublists = 1
+        # unroll the provided compounds,
+        self._subtask_container = SubtaskContainer(max_tries=3)
+        self._subtask_container.load_data(all_enums)
+        self._execute_espsim_parallel()
diff --git a/icolos/core/workflow_steps/calculation/feature_counter.py b/icolos/core/workflow_steps/calculation/feature_counter.py
new file mode 100644
index 0000000..0423d00
--- /dev/null
+++ b/icolos/core/workflow_steps/calculation/feature_counter.py
@@ -0,0 +1,64 @@
+from rdkit.Chem import Mol
+from rdkit.Chem.rdMolDescriptors import CalcNumRings, CalcNumAromaticRings
+from pydantic import BaseModel
+
+from icolos.utils.enums.program_parameters import FeatureCounterEnum
+from icolos.utils.enums.step_enums import StepFeatureCounterEnum
+from icolos.core.workflow_steps.step import _LE
+from icolos.core.workflow_steps.calculation.base import StepCalculationBase
+
+_FC = FeatureCounterEnum()
+_SFC = StepFeatureCounterEnum()
+
+
+class StepFeatureCounter(StepCalculationBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # extend parameters with defaults
+        if _SFC.LEVEL not in self.settings.additional.keys():
+            self.settings.additional[_SFC.LEVEL] = _SFC.LEVEL_CONFORMER
+            self._logger.log(
+                f'No operational level for feature counting specified, defaulting to "{_SFC.LEVEL_CONFORMER}".',
+                _LE.INFO,
+            )
+
+    def _count_rings(self, mol: Mol):
+        number_rings = CalcNumRings(mol)
+        mol.SetProp(_FC.PROPERTY_NUM_RINGS, str(number_rings))
+
+    def _count_aromatic_rings(self, mol: Mol):
+        number_rings = CalcNumAromaticRings(mol)
+        mol.SetProp(_FC.PROPERTY_NUM_AROMATIC_RINGS, str(number_rings))
+
+    def _get_feature_method(self, feature: str):
+        if feature == _FC.PROPERTY_NUM_RINGS:
+            return self._count_rings
+        elif feature == _FC.PROPERTY_NUM_AROMATIC_RINGS:
+            return self._count_aromatic_rings
+        else:
+            raise ValueError(f'Feature "{feature}" not yet supported.')
+
+    def execute(self):
+        feature = self.settings.additional[_SFC.FEATURE].lower()
+        feature_method = self._get_feature_method(feature=feature)
+        level = self.settings.additional[_SFC.LEVEL]
+        mol_count = 0
+        for compound in self.get_compounds():
+            for enumeration in compound.get_enumerations():
+                if level == _SFC.LEVEL_ENUMERATION:
+                    mol = enumeration.get_molecule()
+                    if mol is not None:
+                        feature_method(mol)
+                        mol_count = mol_count + 1
+                elif level == _SFC.LEVEL_CONFORMER:
+                    for conformer in enumeration.get_conformers():
+                        mol = conformer.get_molecule()
+                        if mol is not None:
+                            feature_method(mol)
+                            mol_count = mol_count + 1
+                else:
+                    raise ValueError(f'Level "{level}" not supported.')
+        self._logger.log(
+            f'Counted feature "{feature}" for {mol_count} molecules.', _LE.INFO
+        )
diff --git a/icolos/core/workflow_steps/calculation/panther.py b/icolos/core/workflow_steps/calculation/panther.py
new file mode 100644
index 0000000..c3d8752
--- /dev/null
+++ b/icolos/core/workflow_steps/calculation/panther.py
@@ -0,0 +1,152 @@
+from icolos.core.containers.generic import GenericData
+import os
+import tempfile
+import re
+import numpy as np
+from copy import deepcopy
+from typing import List
+
+from icolos.core.workflow_steps.calculation.base import StepCalculationBase
+from icolos.utils.enums.program_parameters import PantherEnum
+from icolos.utils.enums.step_enums import StepPantherEnum
+from icolos.utils.execute_external.execute import Executor
+from icolos.core.workflow_steps.step import _LE
+from pydantic import BaseModel
+from icolos.utils.general.files_paths import attach_root_path
+
+_SPE = (
+    StepPantherEnum()
+)  # hold the constants to access the relevant value from initialised **data
+_PE = PantherEnum()  # hold the program settings
+
+
+class StepPanther(StepCalculationBase, BaseModel):
+
+    negative_images: List = []
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=Executor)
+
+    def _prepare_tmp_input_dir(self):
+        tmp_dir = tempfile.mkdtemp()
+        return tmp_dir
+
+    def _write_panther_config_file(self, tmp_dir):
+        if not self.settings.additional[_SPE.PANTHER_CONFIG_FILE]:
+            self._logger.log("No config file specified, using default.", _LE.INFO)
+            panther_config = attach_root_path(
+                "/icolos/config/panther/default_panther.in"
+            )
+
+        elif not os.path.isfile(self.settings.additional[_SPE.PANTHER_CONFIG_FILE]):
+            self._logger.log(
+                f"File not found for the provided panther config file path: {self.settings.additional[_SPE.PANTHER_CONFIG_FILE]}",
+                _LE.ERROR,
+            )
+            raise FileNotFoundError(
+                f"The specified panther config file was not found {self.settings.additional[_SPE.PANTHER_CONFIG_FILE]}"
+            )
+
+        else:
+            panther_config = self.settings.additional[_SPE.PANTHER_CONFIG_FILE]
+
+        with open(panther_config, "r") as f:
+            panther_config = f.read()
+
+        # add the parameter absolute paths to the angle, etc. file specifications
+        update_dictionary = deepcopy(self.settings.additional[_SPE.FIELDS])
+        update_dictionary = self._add_ligand_centroid_coordinates(update_dictionary)
+        update_dictionary = self._add_parameter_locations_to_replacement_fields(
+            update_dictionary
+        )
+        # update the configuration and write it to a file
+        panther_config = self._modify_panther_config_file(
+            panther_config, update_dictionary
+        )
+
+        with open(os.path.join(tmp_dir, "panther_config.in"), "w") as f:
+            f.write(panther_config)
+
+    def _add_parameter_locations_to_replacement_fields(
+        self, update_dictionary: dict
+    ) -> dict:
+        # in case not specified (which is the main use case), use the default libraries for charges etc. that should
+        # reside in the same folder as the python entry-point "panther.py"; setting absolute paths here, allows to
+        # execute PANTHER for any input in any given folder
+        for key, value in _SPE.FIELDS_PARAMETERS_LIB.items():
+            if key not in update_dictionary.keys():
+                update_dictionary[key] = os.path.join(
+                    self.settings.additional[_SPE.PANTHER_LOCATION], value
+                )
+        return update_dictionary
+
+    def _add_ligand_centroid_coordinates(self, update_dict: dict) -> dict:
+        coordinates = self._calculate_ligand_centroid(
+            self.settings.additional[_SPE.FIELDS][_SPE.FIELD_KEY_PDB_FILE]
+        )
+        update_dict[_SPE.FIELD_KEY_COORDINATES] = coordinates
+        return update_dict
+
+    def _calculate_ligand_centroid(self, file):
+        with open(file, "r") as f:
+            file_lines = f.readlines()
+        file_lines = [
+            line for line in file_lines if "X   0" in line and len(line.split()) > 5
+        ]
+
+        if file_lines == []:
+            self._logger.log(
+                "No lines corresponding to the ligand found! Centroid will not be correct",
+                _LE.WARNING,
+            )
+        a = np.genfromtxt(file_lines, usecols=[6, 7, 8], skip_header=1)
+        avg = list(a.mean(axis=0))
+        avg = [str(i) for i in avg]
+        return " ".join(avg)
+
+    def _modify_panther_config_file(
+        self, config_file: str, update_dictionary: dict
+    ) -> str:
+        for key, value in update_dictionary.items():
+            pattern = fr"({key}.*:: ).*"
+            pattern = re.compile(pattern)
+            config_file = re.sub(pattern, fr"\1 {value}", config_file)
+        return config_file
+
+    def _execute_backend(self, tmp_dir):
+        arguments = [
+            os.path.join(
+                self.settings.additional[_SPE.PANTHER_LOCATION], _PE.PANTHER_ENTRYPOINT
+            ),
+            os.path.join(tmp_dir, _PE.PANTHER_CONFIG),
+            os.path.join(tmp_dir, _PE.PANTHER_OUTPUT_FILE),
+        ]
+        self._backend_executor.execute(
+            command=_PE.PANTHER_PTYHON2, arguments=arguments, check=True
+        )
+
+    def _parse_panther_output(self, tmp_dir):
+        try:
+            with open(os.path.join(tmp_dir, _PE.PANTHER_OUTPUT_FILE), "r") as f:
+                data = f.read()
+                self.data.generic.add_file(
+                    GenericData(file_name=_PE.PANTHER_OUTPUT_FILE, file_data=data)
+                )
+        except FileNotFoundError:
+            self._logger.log(
+                f"No panther output file was produced for step {self.step_id}, subsequent steps that depend on the negative image will fail.",
+                _LE.WARNING,
+            )
+
+    def execute(self):
+        tmp_dir = self._prepare_tmp_input_dir()
+        self._write_panther_config_file(tmp_dir)
+        self._execute_backend(tmp_dir)
+        self._logger.log("Executed PANTHER and obtained negative image.", _LE.INFO)
+        self._logger.log(
+            f"Calculated negative image for configuration file in {tmp_dir}.", _LE.DEBUG
+        )
+        self._parse_panther_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/calculation/rms_filter.py b/icolos/core/workflow_steps/calculation/rms_filter.py
new file mode 100644
index 0000000..a150f19
--- /dev/null
+++ b/icolos/core/workflow_steps/calculation/rms_filter.py
@@ -0,0 +1,97 @@
+import pandas as pd
+from typing import List
+from pydantic import BaseModel
+
+from icolos.core.containers.compound import Conformer
+
+from icolos.utils.enums.step_enums import StepRMSFilterEnum
+from icolos.core.workflow_steps.step import _LE
+from icolos.core.workflow_steps.calculation.base import StepCalculationBase
+
+_SRF = StepRMSFilterEnum()
+
+
+class StepRMSFilter(StepCalculationBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # extend parameters
+        if _SRF.THRESHOLD not in self.settings.additional.keys():
+            self.settings.additional[_SRF.THRESHOLD] = 1
+        if _SRF.METHOD not in self.settings.additional.keys():
+            self.settings.additional[_SRF.METHOD] = _SRF.METHOD_ALIGNMOL
+        if _SRF.ORDER_BY not in self.settings.additional.keys():
+            self.settings.additional[_SRF.ORDER_BY] = None
+        else:
+            if _SRF.ORDER_ASCENDING not in self.settings.additional.keys():
+                self._logger.log(
+                    'Setting order ascending not specified, setting to "True" (default).',
+                    _LE.WARNING,
+                )
+                self.settings.additional[_SRF.ORDER_ASCENDING] = False
+
+    def _get_representative_indices(
+        self, df_rms: pd.DataFrame, prop_values: List[float]
+    ) -> List[int]:
+        keep_indices = []
+        prop_idx = list(zip(prop_values, list(range(len(prop_values)))))
+        threshold = self.settings.additional[_SRF.THRESHOLD]
+        while len(prop_idx) > 0:
+            # get the best (according to the property) element's index, add it to the list and remove it from
+            # the remaining ones
+            if self.settings.additional[_SRF.ORDER_BY] is not None:
+                prop_idx = [
+                    (prop, idx)
+                    for prop, idx in sorted(
+                        prop_idx, reverse=self.settings.additional[_SRF.ORDER_ASCENDING]
+                    )
+                ]
+            cur_best_idx = prop_idx[0][1]
+            keep_indices.append(cur_best_idx)
+            del prop_idx[0]
+
+            # remove all, that are fulfilling the RMS threshold
+            for i in reversed(range(len(prop_idx))):
+                comp_idx = prop_idx[i][1]
+                cur_rms = df_rms.iloc[cur_best_idx, comp_idx]
+                if cur_rms <= threshold:
+                    del prop_idx[i]
+        return keep_indices
+
+    def _filter_conformers(self, conformers: List[Conformer]) -> List[Conformer]:
+        # to select the "best" conformers, here the property to use for ordering / ranking is specified
+        order_by = self.settings.additional[_SRF.ORDER_BY]
+        if order_by is not None:
+            prop_values = self._get_property_values(conformers, order_by)
+        else:
+            prop_values = [None for _ in range(len(conformers))]
+
+        # generate RMS matrix (NxN, where N is the number of conformers)
+        df_rms = self._calculate_rms_matrix(conformers, self._get_rms_method())
+
+        keep_indices = self._get_representative_indices(df_rms, prop_values)
+
+        return [conformers[i] for i in range(len(conformers)) if i in keep_indices]
+
+    def execute(self):
+        for compound in self.get_compounds():
+            for enumeration in compound.get_enumerations():
+                if self._input_object_empty(enumeration):
+                    continue
+
+                number_conformers_before = len(enumeration)
+
+                # filter conformers on the enumeration level
+                filtered_conformers = self._filter_conformers(
+                    conformers=enumeration.get_conformers()
+                )
+
+                # add filtered conformers to enumeration
+                enumeration.clear_conformers()
+                for conf in filtered_conformers:
+                    enumeration.add_conformer(conformer=conf, auto_update=True)
+                number_conformers_after = len(enumeration)
+                self._logger.log(
+                    f"Filtered {number_conformers_before} conformers down to {number_conformers_after} for enumeration {enumeration.get_index_string()}.",
+                    _LE.INFO,
+                )
diff --git a/icolos/core/workflow_steps/calculation/rmsd.py b/icolos/core/workflow_steps/calculation/rmsd.py
new file mode 100644
index 0000000..48fa565
--- /dev/null
+++ b/icolos/core/workflow_steps/calculation/rmsd.py
@@ -0,0 +1,47 @@
+from typing import List
+from pydantic import BaseModel
+
+from icolos.core.containers.compound import Conformer, unroll_conformers
+from icolos.utils.enums.step_enums import StepRMSDEnum, StepDataManipulationEnum
+from icolos.core.workflow_steps.step import _LE
+from icolos.core.workflow_steps.calculation.base import StepCalculationBase
+
+_SR = StepRMSDEnum()
+_SDM = StepDataManipulationEnum()
+
+
+class StepRMSD(StepCalculationBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # extend parameters
+        if _SR.METHOD not in self.settings.additional.keys():
+            self.settings.additional[_SR.METHOD] = _SR.METHOD_ALIGNMOL
+
+    def _calculate_RMSD(self, conformers: List[Conformer]):
+        for conf in conformers:
+            rmsd_matrix = self._calculate_rms_matrix(
+                conformers=[conf] + conf.get_extra_data()[_SDM.KEY_MATCHED],
+                rms_method=self._get_rms_method(),
+            )
+
+            # use the specified tag name if it is the first value and append an index in case there are more
+            for idx, col in enumerate(rmsd_matrix.columns[1:]):
+                combined_tag = "".join([_SR.RMSD_TAG, "" if idx == 0 else str(idx)])
+                rmsd_value = rmsd_matrix.iloc[[0]][col][0]
+                conf.get_molecule().SetProp(combined_tag, str(rmsd_value))
+                conf.get_extra_data()[_SDM.KEY_MATCHED][idx].get_molecule().SetProp(
+                    combined_tag, str(rmsd_value)
+                )
+
+    def execute(self):
+        # this assumes that the conformers that are to be matched for the calculation of the RMSD matrix, are attached
+        # as a list in a generic data field with a specified key
+        conformers = unroll_conformers(compounds=self.get_compounds())
+        self._calculate_RMSD(conformers=conformers)
+        self._logger.log(
+            f"Annotated {len(conformers)} conformers with RMSD values (tag: {_SR.RMSD_TAG}).",
+            _LE.INFO,
+        )
+
+        # TODO: add a nice pandas DF with the RMSD values to a generic data field
diff --git a/icolos/core/workflow_steps/calculation/shaep.py b/icolos/core/workflow_steps/calculation/shaep.py
new file mode 100644
index 0000000..267e4bb
--- /dev/null
+++ b/icolos/core/workflow_steps/calculation/shaep.py
@@ -0,0 +1,77 @@
+from icolos.utils.execute_external.execute import Executor
+from icolos.core.workflow_steps.calculation.base import StepCalculationBase
+from icolos.core.workflow_steps.step import _LE
+from icolos.utils.enums.step_enums import StepShaepEnum
+from icolos.utils.enums.program_parameters import PantherEnum, ShaepEnum
+from icolos.core.containers.compound import Conformer
+import tempfile
+from pydantic import BaseModel
+import os
+
+_SSE = StepShaepEnum()
+_SE = ShaepEnum()
+_PE = PantherEnum()
+
+
+class StepShaep(StepCalculationBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=Executor)
+
+    def _prepare_tmp_input_dir(self):
+        tmp_dir = tempfile.mkdtemp()
+        return tmp_dir
+
+    def _execute_backend(self, conf_path: str, tmp_dir: str, ni_path: str):
+        arguments = [
+            os.path.join(tmp_dir, ni_path),
+            conf_path,
+            os.path.join(tmp_dir, _SE.OUTPUT_SIMILARITY),
+        ]
+        self._backend_executor.execute(
+            command=_SE.SHAEP_EXECUTABLE, arguments=arguments, check=True
+        )
+
+    def _parse_output(self, tmp_dir: str, conformer: Conformer):
+        with open(os.path.join(tmp_dir, _SE.OUTPUT_SIMILARITY), "r") as f:
+            # TODO: add support for multiple input structures; ignore the names (all will be in one line), but from
+            #       position 8 (index 7 in python) onwards, the shape and esp similarities are reported in the same
+            #       order as the input, i.e. <7 other values> mol1_shape mol1_esp mol2_shape ...
+            parts = f.readlines()[1].split("\t")
+            conformer.get_molecule().SetProp(_SE.TAG_SHAPE_SIMILARITY, str(parts[7]))
+            conformer.get_molecule().SetProp(_SE.TAG_ESP_SIMILARITY, str(parts[8]))
+
+    def execute(self):
+        number_rescored = 0
+        for compound in self.get_compounds():
+            for enumeration in compound.get_enumerations():
+                if len(enumeration.get_conformers()) == 0:
+                    self._logger.log(
+                        f"Found no conformers for enumeration {enumeration} for compound {compound}.",
+                        _LE.WARNING,
+                    )
+                    # we can still execute shaep at the enumeration level, if the compounds are correcty annotated they should be written out ok.  Will be slower though
+                    # easiest for now is to add the enumeration mol object as a single conformer and run that through shaep
+                    mol = enumeration.get_molecule()
+                    conf = Conformer(conformer=mol)
+                    enumeration.add_conformer(conf)
+
+                # TODO: ShaEP allow batch execution for any number of compounds (parsing gets more difficult though)
+                #       Implement that to avoid overhead from file system issues
+                # TODO: Refactor and add comments
+                for conformer in enumeration.get_conformers():
+                    tmp_dir = self._prepare_tmp_input_dir()
+                    conf_path = os.path.join(tmp_dir, _SE.CONFORMER_PATH)
+                    ni_file = self.data.generic.get_files_by_extension("mol2")[0]
+                    ni_file.write(tmp_dir)
+                    conformer.write(conf_path)
+                    self._execute_backend(conf_path, tmp_dir, ni_file.get_file_name())
+                    self._parse_output(tmp_dir, conformer)
+                    self._logger.log(
+                        f"Finished shaep execution for conformer {enumeration.get_index_string()}.",
+                        _LE.DEBUG,
+                    )
+                    number_rescored += 1
+                    self._remove_temporary(tmp_dir)
+        self._logger.log(f"Executed ShaEP for {number_rescored} conformers.", _LE.INFO)
diff --git a/icolos/core/workflow_steps/calculation/turbomole.py b/icolos/core/workflow_steps/calculation/turbomole.py
new file mode 100644
index 0000000..0ff9d7e
--- /dev/null
+++ b/icolos/core/workflow_steps/calculation/turbomole.py
@@ -0,0 +1,440 @@
+import os
+import tempfile
+from typing import Tuple, List
+from copy import deepcopy
+
+from pydantic import BaseModel
+
+from icolos.utils.enums.step_enums import StepTurbomoleEnum
+from icolos.utils.execute_external.execute import execution_successful
+from icolos.utils.execute_external.openbabel import OpenBabelExecutor
+from icolos.utils.execute_external.turbomole import TurbomoleExecutor
+from icolos.utils.general.convenience_functions import nested_get
+
+from icolos.utils.general.molecules import get_charge_for_molecule
+
+from icolos.core.containers.compound import Conformer, Enumeration
+
+from icolos.utils.enums.program_parameters import OpenBabelEnum
+from icolos.utils.enums.program_parameters import TurbomoleEnum
+from icolos.utils.enums.compound_enums import ConformerContainerEnum
+from icolos.core.workflow_steps.calculation.base import StepCalculationBase
+from icolos.core.workflow_steps.step import _LE
+from icolos.loggers.logger_utils import log_multiline_string
+from icolos.utils.general.files_paths import _FG, check_file_availability, gen_tmp_file
+
+from icolos.utils.general.parallelization import Parallelizer, SubtaskContainer
+
+_OE = OpenBabelEnum()
+_EE = TurbomoleEnum()
+_COE = ConformerContainerEnum()
+_STE = StepTurbomoleEnum()
+
+
+class StepTurbomole(StepCalculationBase, BaseModel):
+    """Step that executes Turbomole.
+
+    Note, that the execution (especially in conjunction with a subsequent cosmo step) is relatively complex.
+    (1) Write the conformer as an SDF file to a temporary directory,
+    (2) use obabel to translate it to an XYZ file,
+    (3) use t2x to make a coord file out of it (input for turbomole; is updated during geometry optimization),
+    (4) execute Turbomole, generating a (i) a final coord file and (ii) a trajectory (if specified),
+    (5) use x2t to extract the final "snapshot" as an XYZ file and translate it to SDF and
+    (6) update the coordinates and tags in the conformers
+
+    IMPORTANT: Keep the "mol.cosmo" file attached to the conformer as additional data for a possible cosmo step."""
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # initialize the executor and test availability
+        self._initialize_backend(executor=TurbomoleExecutor)
+        # TODO: figure out why "module load turbomole/73 && rdfit" sometimes fails (see also below) and
+        #       use strict=True after fix; probably, it has to to with $TURBOTMPDIR (all parallel jobs access the same)
+        self._check_backend_availability(strict=False)
+
+    def get_original_conformer(self, conformer) -> Conformer:
+        for compound in self.get_compounds():
+            for enum in compound.get_enumerations():
+                if (
+                    enum._enumeration_id
+                    == conformer.get_enumeration_object().get_enumeration_id()
+                ):
+                    for conf in enum.get_conformers():
+                        if conf._conformer_id == conformer._conformer_id:
+                            return conf
+
+    def _prepare_tmp_input_directories(
+        self, batch: List
+    ) -> Tuple[List, List[str], List[str], List[str], List[str], List[str], List[str]]:
+        conformers = []
+        tmp_dirs = []
+        paths_input_sdf = []
+        paths_input_xyz = []
+        paths_coord = []
+        paths_tm_config = []
+        paths_cosmo_config = []
+        for sublist in batch:
+            for element in sublist:  # there is only one
+                conformer = element.data
+                conformers.append(conformer)
+                # 1) generate all temporary paths
+                tmp_dir = tempfile.mkdtemp()
+                _, path_input_sdf = gen_tmp_file(
+                    prefix="tmp_", suffix=".sdf", dir=tmp_dir
+                )
+                _, path_input_xyz = gen_tmp_file(
+                    prefix="tmp_", suffix=".xyz", dir=tmp_dir
+                )
+                path_coord = os.path.join(tmp_dir, _EE.COORD)
+
+                # 2) write-out the conformers for an enumeration in a SDF file
+                conformer.write(path=path_input_sdf)
+
+                # 3) translate the SDF into an XYZ file (using OpenBabel)
+                # Note, that all tags are lost here (but the names are not!)
+                obabel_executor = OpenBabelExecutor()
+                obabel_executor.execute(
+                    command=_OE.OBABEL,
+                    arguments=[
+                        _OE.OBABEL_INPUTFORMAT_SDF,
+                        path_input_sdf,
+                        _OE.OBABEL_OUTPUTFORMAT_XYZ,
+                        "".join([_OE.OBABEL_O, path_input_xyz]),
+                    ],
+                    check=True,
+                    location=tmp_dir,
+                )
+
+                # 4) translate the XYZ to a TM input file ("coord"); "x2t" writes to stdout
+                result = self._backend_executor.execute(
+                    command=_EE.TM_X2T, arguments=[path_input_xyz], check=True
+                )
+                with open(path_coord, "w") as file:
+                    file.write(result.stdout)
+
+                # 5) add paths
+                tmp_dirs.append(tmp_dir)
+                paths_input_sdf.append(path_input_sdf)
+                paths_input_xyz.append(path_input_xyz)
+                paths_coord.append(path_coord)
+
+                tm_path, cosmo_path = self._get_config_paths(conformer)
+                paths_tm_config.append(tm_path)
+                paths_cosmo_config.append(cosmo_path)
+
+        return (
+            conformers,
+            tmp_dirs,
+            paths_input_sdf,
+            paths_input_xyz,
+            paths_coord,
+            paths_tm_config,
+            paths_cosmo_config,
+        )
+
+    def _get_config_paths(self, conformer: Conformer) -> Tuple[str, str]:
+        try:
+            config_dir = self.settings.additional[_EE.TM_CONFIG_DIR]
+            config_basename = self.settings.additional[_EE.TM_CONFIG_BASENAME]
+            path_cosmo_config = self.settings.additional[_EE.TM_CONFIG_COSMO]
+        except KeyError as e:
+            raise KeyError("The dir, basename and cosmo paths need to be set.") from e
+
+        charge = str(
+            get_charge_for_molecule(
+                molecule=conformer._enumeration_object.get_molecule()
+            )
+        )
+
+        # the path would look like: /opt/Icolos/turbomole_config/b97-3c-ri-d3-def2-mtzvp-int-nosym-charge-1.tm
+        path_tm_config = os.path.join(
+            config_dir, "".join([config_basename, charge, _EE.TM_CONFIG_ENDING])
+        )
+        return path_tm_config, path_cosmo_config
+
+    def _execute_define(self, tmp_dir, path_tm_config: str):
+        result = self._backend_executor.execute(
+            command=_EE.TM_DEFINE,
+            arguments=[" ".join(["<", path_tm_config])],
+            check=True,
+            location=tmp_dir,
+        )
+
+        if not execution_successful(result.stderr, _EE.TM_DEFINE_SUCCESS_STRING):
+            self._logger.log(
+                f"Execution of {_EE.TM_DEFINE} failed for file {path_tm_config}. Error message:",
+                _LE.ERROR,
+            )
+            log_multiline_string(
+                logger=self._logger_blank,
+                level=_LE.ERROR,
+                multi_line_string=result.stdout,
+            )
+
+    def _execute_cosmoprep(self, tmp_dir, path_cosmo_config: str):
+        result = self._backend_executor.execute(
+            command=_EE.TM_COSMOPREP,
+            arguments=[" ".join(["<", path_cosmo_config])],
+            check=True,
+            location=tmp_dir,
+        )
+
+        if not execution_successful(result.stderr, _EE.TM_COSMOPREP_SUCCESS_STRING):
+            self._logger.log(
+                f"Execution of {_EE.TM_COSMOPREP} failed for file {path_cosmo_config}. Error message:",
+                _LE.ERROR,
+            )
+            log_multiline_string(
+                logger=self._logger_blank,
+                level=_LE.ERROR,
+                multi_line_string=result.stdout,
+            )
+
+    def _manipulate_control_script(self, path: str):
+        # do the following changes to the "control" script in order to generate FINE Cosmo files
+        with open(path, "r") as f:
+            control = f.readlines()
+        new_control = []
+        for line in control:
+            if line.rstrip("\n") != _EE.CONTROL_COSMO_OUT:
+                new_control.append(line)
+            else:
+                new_control.append("".join([_EE.CONTROL_COSMO_REPLACE, "\n"]))
+
+                # only add this line in case there is no optimization run going on
+                if (
+                    nested_get(
+                        self.settings.additional,
+                        [_STE.EXECUTION_MODE],
+                        default=_EE.TM_RIDFT,
+                    )
+                    == _EE.TM_RIDFT
+                ):
+                    new_control.append("".join([_EE.CONTROL_COSMO_INSERTION, "\n"]))
+        with open(path, "w") as f:
+            f.writelines(new_control)
+
+    def _get_arguments(self) -> list:
+        arguments = []
+
+        # add flags
+        for flag in self.settings.arguments.flags:
+            arguments.append(flag)
+
+        # flatten the dictionary into a list for command-line execution
+        for key in self.settings.arguments.parameters.keys():
+            arguments.append(key)
+            arguments.append(self.settings.arguments.parameters[key])
+        return arguments
+
+    def _execute_run(self, tmp_dir):
+        execution_mode = nested_get(
+            self.settings.additional, [_STE.EXECUTION_MODE], default=_EE.TM_RIDFT
+        )
+        result = self._backend_executor.execute(
+            command=execution_mode,
+            arguments=self._get_arguments(),
+            check=True,
+            location=tmp_dir,
+        )
+
+        if (
+            not execution_successful(result.stderr, _EE.TM_RIDFT_SUCCESS_STRING)
+            or result.returncode != 0
+        ):
+            self._logger.log(
+                f"Execution of {execution_mode} failed (return code: {result.returncode}). Error message (stdout & stderr):",
+                _LE.DEBUG,
+            )
+            log_multiline_string(
+                logger=self._logger_blank,
+                level=_LE.DEBUG,
+                multi_line_string=result.stdout,
+            )
+            log_multiline_string(
+                logger=self._logger_blank,
+                level=_LE.DEBUG,
+                multi_line_string=result.stderr,
+            )
+        return result.returncode
+
+    def _coord2sdf(self, tmp_dir, path_output_xyz: str, path_output_sdf: str):
+        # extract the latest snapshot and write it as an XYZ file
+        result = self._backend_executor.execute(
+            command=_EE.TM_T2X, arguments=[_EE.TM_T2X_C], check=True, location=tmp_dir
+        )
+
+        with open(path_output_xyz, "w") as file:
+            file.write(result.stdout)
+
+        # translate it to an SDF
+        obabel_executor = OpenBabelExecutor()
+        obabel_executor.execute(
+            command=_OE.OBABEL,
+            arguments=[
+                _OE.OBABEL_INPUTFORMAT_XYZ,
+                path_output_xyz,
+                _OE.OBABEL_OUTPUT_FORMAT_SDF,
+                "".join([_OE.OBABEL_O, path_output_sdf]),
+            ],
+            check=True,
+        )
+
+    def _parse_output(self, tmp_dirs: List[str], conformers: List[Conformer]):
+        results = []
+        # load and attach "mol.cosmo" file
+        for tmp_dir, conformer in zip(tmp_dirs, conformers):
+            result = _STE.SUCCESS
+            cosmo_path = os.path.join(tmp_dir, _EE.TM_OUTPUT_COSMOFILE)
+            if check_file_availability(path=cosmo_path) != _FG.NOT_GENERATED:
+                with open(cosmo_path, "r") as f:
+                    file_content = f.readlines()
+                conf = self.get_original_conformer(conformer)
+                conf.add_extra_data(key=_COE.EXTRA_DATA_COSMOFILE, data=file_content)
+                # conformer.add_extra_data(key=_COE.EXTRA_DATA_COSMOFILE, data=file_content)
+
+            else:
+                self._logger.log(
+                    f"Could not load cosmo file for {conformer.get_index_string()}, will remove conformer.",
+                    _LE.WARNING,
+                )
+                self._logger.log(
+                    f"File {cosmo_path} could not be loaded for {conformer.get_index_string()}.",
+                    _LE.DEBUG,
+                )
+                result = _STE.FAILED
+
+                # set molecule to None removes the 3D coordinates -> will be deleted in the end
+                conformer.set_molecule(None)
+
+            # load and attach "coord" file
+            coord_file = os.path.join(tmp_dir, _EE.TM_OUTPUT_COORDFILE)
+            coord_file_status = check_file_availability(path=coord_file)
+            if coord_file_status == _FG.NOT_GENERATED:
+                self._logger.log(
+                    f"File {coord_file} could not be loaded for {conformer.get_index_string()}.",
+                    _LE.DEBUG,
+                )
+                result = _STE.FAILED
+            elif coord_file_status == _FG.GENERATED_EMPTY:
+                self._logger.log(
+                    f"File {coord_file} is empty for {conformer.get_index_string()}.",
+                    _LE.DEBUG,
+                )
+                result = _STE.FAILED
+            elif coord_file_status == _FG.GENERATED_SUCCESS:
+                with open(coord_file, "r") as f:
+                    file_content = f.readlines()
+                    conf = self.get_original_conformer(conformer)
+                    conf.add_extra_data(
+                        key=_COE.EXTRA_DATA_COORDFILE, data=file_content
+                    )
+
+                execution_mode = nested_get(
+                    self.settings.additional,
+                    [_STE.EXECUTION_MODE],
+                    default=_EE.TM_RIDFT,
+                )
+
+                # for RIDFT, only the cosmo file is required as coordinates are not updated (no geometry optimization)
+                if execution_mode != _EE.TM_RIDFT:
+                    path_output_xyz = os.path.join(tmp_dir, _EE.TM_OUTPUT_FINAL_XYZ)
+                    path_output_sdf = os.path.join(tmp_dir, _EE.TM_OUTPUT_FINAL_SDF)
+                    self._coord2sdf(tmp_dir, path_output_xyz, path_output_sdf)
+                    conf = self.get_original_conformer(conformer)
+                    conf.update_coordinates(path=path_output_sdf)
+            results.append(result)
+        return results
+
+    def _clean_failed_conformers(self, enumeration: Enumeration) -> Tuple[int, int]:
+        n_conformers_before = len(enumeration.get_conformers())
+        enumeration.clean_failed_conformers()
+        n_conformers_after = len(enumeration.get_conformers())
+        return n_conformers_before, n_conformers_after
+
+    def _run_conformer(
+        self,
+        conformer: Conformer,
+        tmp_dir: str,
+        path_tm_config: str,
+        path_cosmo_config: str,
+    ) -> None:
+        self._execute_define(tmp_dir=tmp_dir, path_tm_config=path_tm_config)
+        # execute COSMOprep (update "control")
+        self._execute_cosmoprep(tmp_dir=tmp_dir, path_cosmo_config=path_cosmo_config)
+        # set a necessary environment variable
+        os.environ[_EE.TM_TURBOTMPDIR] = tmp_dir
+        # update the "control" file
+        self._manipulate_control_script(path=os.path.join(tmp_dir, _EE.CONTROL))
+        # all ready; start the execution
+        self._execute_run(tmp_dir)
+
+        self._logger.log(
+            f"Finished Turbomole execution for conformer {conformer.get_index_string()} in directory {tmp_dir}.",
+            _LE.DEBUG,
+        )
+
+    def _execute_turbomole_parallel(self):
+        parallelizer = Parallelizer(func=self._run_conformer)
+        n = 1
+
+        while self._subtask_container.done() is False:
+
+            next_batch = self._get_sublists(
+                get_first_n_lists=self._get_number_cores()
+            )  # return n lists of length max_sublist_length
+            _ = [sub.increment_tries() for element in next_batch for sub in element]
+            _ = [sub.set_status_failed() for element in next_batch for sub in element]
+
+            (
+                conformers,
+                tmp_dirs,
+                paths_input_sdf,
+                paths_input_xyz,
+                paths_coord,
+                paths_tm_configs,
+                paths_cosmo_configs,
+            ) = self._prepare_tmp_input_directories(next_batch)
+
+            self._logger.log(
+                f"Executing Turbomole for batch {n} containing {len(tmp_dirs)} conformers",
+                _LE.INFO,
+            )
+
+            parallelizer.execute_parallel(
+                conformer=conformers,
+                tmp_dir=tmp_dirs,
+                path_tm_config=paths_tm_configs,
+                path_cosmo_config=paths_cosmo_configs,
+            )
+
+            results = self._parse_output(tmp_dirs, conformers)
+
+            for sublist, result in zip(next_batch, results):
+                # TODO: this only works if max length sublist == 1, fine for now as that is all turbomole can handle
+                for task in sublist:
+                    if result == _STE.SUCCESS:
+                        task.set_status_success()
+                    else:
+                        task.set_status_failed()
+            self._remove_temporary(tmp_dirs)
+            n += 1
+
+    def execute(self):
+        all_conformers = []
+        for compound in self.get_compounds():
+            for enumeration in compound.get_enumerations():
+                if self._input_object_empty(enumeration):
+                    continue
+                for conformer in enumeration.get_conformers():
+                    # for efficient parallelisation, unroll all conformers
+                    conf = deepcopy(conformer)
+                    all_conformers.append(conf)
+
+        self.execution.parallelization.max_length_sublists = 1
+        self._subtask_container = SubtaskContainer(
+            max_tries=self.execution.failure_policy.n_tries
+        )
+        self._subtask_container.load_data(all_conformers)
+        self._execute_turbomole_parallel()
diff --git a/icolos/core/workflow_steps/cavity_explorer/__init__.py b/icolos/core/workflow_steps/cavity_explorer/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/core/workflow_steps/cavity_explorer/base.py b/icolos/core/workflow_steps/cavity_explorer/base.py
new file mode 100644
index 0000000..0ed37f8
--- /dev/null
+++ b/icolos/core/workflow_steps/cavity_explorer/base.py
@@ -0,0 +1,69 @@
+from pydantic import BaseModel
+from icolos.core.workflow_steps.step import StepBase
+from typing import List
+from icolos.utils.enums.step_enums import StepCavExploreEnum
+
+_SFP = StepCavExploreEnum()
+
+
+class StepCavityExplorerBase(StepBase, BaseModel):
+    eps: float = None
+    iso_value: int = None
+    threshold: float = None
+    min_samples: int = None
+    format_: str = None
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def _write_input_files(self, tmp_dir):
+        # HM: this is the simplest implementation - we can think about whether we need any more complexity
+        for file in self.data.generic.get_flattened_files():
+            file.write(tmp_dir)
+
+    def _parse_arguments(self, flag_dict: dict, args: list = None) -> List:
+        arguments = args if args is not None else []
+        # first add the settings from the command line
+        for key in self.settings.arguments.parameters.keys():
+            arguments.append(key)
+            arguments.append(str(self.settings.arguments.parameters[key]))
+        for flag in self.settings.arguments.flags:
+            arguments.append(str(flag))
+        for key, value in flag_dict.items():
+            # only add defaults if they have not been specified in the json
+            if key not in arguments:
+                arguments.append(key)
+                arguments.append(value)
+        return arguments
+
+    def _set_mdpocket_args(self):
+        if self.settings.additional is not None:
+            keys = self.settings.additional.keys()
+
+            self.eps = self.settings.additional[_SFP.EPS] if _SFP.EPS in keys else 3
+            self.iso_value = (
+                self.settings.additional[_SFP.ISO_VALUE]
+                if _SFP.ISO_VALUE in keys
+                else 0.5
+            )
+            self.threshold = (
+                self.settings.additional[_SFP.THRESHOLD]
+                if _SFP.THRESHOLD in keys
+                else 20.0
+            )
+            self.min_samples = (
+                self.settings.additional[_SFP.MIN_SAMPLES]
+                if _SFP.MIN_SAMPLES in keys
+                else 25
+            )
+            if _SFP.TRAJ_TYPE in keys:
+                if self.settings.additional[_SFP.TRAJ_TYPE].lower() == "gromacs":
+                    self.format_ = "xtc"
+                elif self.settings.additional[_SFP.TRAJ_TYPE].lower() == "desmond":
+                    self.format_ = "dtr"
+                else:
+                    raise ValueError(
+                        "Only Desmond and GROMACS trajectory types are supported"
+                    )
+            else:
+                raise ValueError("Trajectory format was not specified!")
diff --git a/icolos/core/workflow_steps/cavity_explorer/mdpocket.py b/icolos/core/workflow_steps/cavity_explorer/mdpocket.py
new file mode 100644
index 0000000..0d4eada
--- /dev/null
+++ b/icolos/core/workflow_steps/cavity_explorer/mdpocket.py
@@ -0,0 +1,306 @@
+from icolos.utils.general.parallelization import Parallelizer, SubtaskContainer
+from pydantic import BaseModel
+from icolos.core.workflow_steps.cavity_explorer.base import StepCavityExplorerBase
+from icolos.utils.enums.step_enums import StepCavExploreEnum
+from icolos.utils.execute_external.execute import Executor
+from icolos.core.workflow_steps.step import _LE
+from sklearn.cluster import DBSCAN
+from collections import Counter
+import numpy as np
+import re
+import os
+
+_SFP = StepCavExploreEnum()
+
+
+class StepMDpocket(StepCavityExplorerBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # self._initialize_backend(executor=MPIExecutor)
+        self._initialize_backend(executor=Executor)
+
+        # set max_length_sublists to 1
+        self.execution.parallelization.max_length_sublists = 1
+
+    def _create_density_grid_file(self, tmp_dir: str, iso_value: float):
+        """creates a density grid from the .dx-file into a .pdb-file, heavily influenced by extractISOPdb.py provided
+        by fpocket"""
+        density_file = [
+            file for file in os.listdir(tmp_dir) if file.endswith("dens_grid.dx")
+        ]
+        assert len(density_file) == 1
+        density_file = density_file[0]
+
+        outfile = os.path.join(tmp_dir, f"iso{iso_value}.pdb")
+
+        with open(os.path.join(tmp_dir, density_file), "r") as f:
+            # get the axis that shows the most variation during the trajectory, this will be the leading axis
+            # read the header - here is an example
+            header = ""
+            tmp = f.readline()
+            while tmp[0] != "o":
+                header = header + tmp
+                tmp = f.readline()
+
+            # read the grid size
+            r = re.compile("\w+")
+            gsize = r.findall(tmp)
+            gsize = [int(gsize[-3]), int(gsize[-2]), int(gsize[-1])]
+
+            # read the origin of the system
+            line = f.readline().split()
+            origin = [float(line[-3]), float(line[-2]), float(line[-1])]
+
+            # read grid space
+            line = f.readline().split()
+            deltax = [float(line[-3]), float(line[-2]), float(line[-1])]
+            line = f.readline().split()
+            deltay = [float(line[-3]), float(line[-2]), float(line[-1])]
+            line = f.readline().split()
+            deltaz = [float(line[-3]), float(line[-2]), float(line[-1])]
+
+            # pay attention here, this assumes always orthogonal normalized space, but normally it should be ok
+            delta = np.array([deltax[0], deltay[1], deltaz[2]])
+
+            # read the number of data
+            f.readline()
+            r = re.compile("\d+")
+            n_entries = int(r.findall(f.readline())[2])
+
+            if n_entries != gsize[0] * gsize[1] * gsize[2]:
+                raise AssertionError(
+                    "Error reading the file. The number of expected data points does not correspond to the number of "
+                    "labeled data points in the header."
+                )
+            # create a 3D numpy array filled up with 0
+            # initiate xyz counter for reading the grid data
+            z = 0
+            y = 0
+            x = 0
+
+            self._logger.log("Reading grid file...", _LE.DEBUG)
+
+            with open(outfile, "w") as f_out:
+                counter = 1
+                for _ in range(n_entries // 3):
+                    c = f.readline().split()
+                    if len(c) != 3:
+                        self._logger.log("error reading grid data", _LE.ERROR)
+                        raise AssertionError
+                    for i in range(3):
+                        if (0 > iso_value > float(c[i])) or (
+                            0 < iso_value < float(c[i])
+                        ):
+                            # f_out.write(f"ATOM  {counter}  C   PTH     1   {origin[0] + float(x) * delta[0]} {origin[1] + float(y) * delta[1]} {origin[2] + float(z) * delta[2]} 0.00 0.00\n")
+                            f_out.write(
+                                "ATOM  %5d  C   PTH     1    %8.3f%8.3f%8.3f%6.2f%6.2f\n"
+                                % (
+                                    counter,
+                                    origin[0] + float(x) * delta[0],
+                                    origin[1] + float(y) * delta[1],
+                                    origin[2] + float(z) * delta[2],
+                                    0.0,
+                                    0.0,
+                                )
+                            )
+                            counter += 1
+                        z += 1
+                        if z >= gsize[2]:
+                            z = 0
+                            y += 1
+                            if y >= gsize[1]:
+                                y = 0
+                                x += 1
+
+        self._logger.log(f"Finished writing {outfile}", _LE.DEBUG)
+
+    def _cluster_pockets(self, tmp_dir, eps, min_samples, threshold, iso_value):
+        """
+        Clusters points from the initial MDpocket density grid, at a certain iso value
+        """
+        iso_file = os.path.join(tmp_dir, f"iso{iso_value}.pdb")
+        with open(iso_file, "r") as f:
+            # collects the data from the pdb-file (x,y,z coordinates)
+            data = {
+                (line[5:11].strip()): (
+                    line[30:38].strip(),
+                    line[38:46].strip(),
+                    line[46:54].strip(),
+                )
+                for line in f.readlines()
+            }
+        db = DBSCAN(eps=eps, min_samples=min_samples).fit(
+            np.array(list(data.values())).astype(np.float64)
+        )
+
+        labels = db.labels_
+        data_ = np.array(list(data.values())).astype(np.float64)
+        db.fit_predict(data_)
+
+        self._logger.log(
+            f"Number of clusters found for eps = {eps}, iso = {iso_value}, min_samples = {min_samples} and threshold = {threshold} is: {len(set(db.labels_))}",
+            _LE.DEBUG,
+        )
+
+        pockets_report = Counter(db.labels_)
+        filtered_pockets = []
+        filtered_data = {}
+        filtered_labels = []
+
+        # sorts out the pockets with more than threshold points
+        for k, v in pockets_report.items():
+            if v > self.threshold and k >= 0:
+                filtered_pockets.append(k)
+
+        # get the keys and labels for each data point
+        res = list(zip(list(data.keys()), labels))
+
+        # get lists with the data and labels for the filtered pockets
+        for pocket in filtered_pockets:
+            for (index, label) in res:
+                if label == pocket:
+                    filtered_data[index] = data.get(index)
+                    filtered_labels.append(label)
+
+        self._logger.log(
+            f"PocketIDs having more than {self.threshold} points are: {filtered_pockets}",
+            _LE.DEBUG,
+        )
+        self._logger.log(
+            f"The number of filtered pockets is: {len(filtered_pockets)}", _LE.DEBUG
+        )
+        return data, labels, filtered_data, filtered_labels, pockets_report
+
+    def _save_pocket_files(self, tmp_dir, data, labels):
+        """saves the individual pockets as individual pdbs to be used with mdpocket"""
+        iso_file = os.path.join(tmp_dir, f"iso{self.iso_value}.pdb")
+
+        # define labels and indices
+        res = list(zip(list(data.keys()), labels))
+        with open(iso_file, "r") as f:
+            original_lines = f.readlines()
+        # filter out the indices of all pockets except outliers
+        indices = list(set([l for l in labels if l >= 0]))
+
+        # save the pocket-pdbs - these are passed with --selected-pocket arg later
+        for label in indices:
+            with open(os.path.join(tmp_dir, f"pocket_{label}.pdb"), "w") as f:
+                for (index, lab) in res:
+                    if lab == label:
+                        f.write(original_lines[int(index) - 1])
+
+    def _run_mdpocket_selected_pocket(self, tmp_dir):
+        """runs the second mdpocket command for fpocket3"""
+        pocket_files = [
+            file
+            for file in os.listdir(tmp_dir)
+            if file.endswith(".pdb") and "pocket_" in file
+        ]
+        argument_dicts = []
+        for file in pocket_files:
+            arguments = self._parse_arguments(
+                flag_dict={
+                    "--trajectory_file": os.path.join(
+                        tmp_dir,
+                        self.data.generic.get_argument_by_extension(self.format_),
+                    ),
+                    "--trajectory_format": self.format_,
+                    "--selected_pocket": os.path.join(
+                        tmp_dir, os.path.join(tmp_dir, file)
+                    ),
+                    "-f": self.data.generic.get_argument_by_extension("pdb"),
+                    "-o": file.split(".")[0],
+                }
+            )
+            argument_dicts.append(arguments)
+
+        fpocket_parallelizer = Parallelizer(func=self._execute_mdpocket)
+        self._subtask_container = SubtaskContainer(
+            max_tries=self.execution.failure_policy.n_tries
+        )
+        self._subtask_container.load_data(argument_dicts)
+
+        while self._subtask_container.done() is False:
+            next_batch = self._get_sublists(get_first_n_lists=self._get_number_cores())
+
+            batch_dirs, batch_args = self._prepare_batch_inputs(next_batch, tmp_dir)
+
+            fpocket_parallelizer.execute_parallel(
+                tmp_dir=batch_dirs, arguments=batch_args
+            )
+            _ = [sub.increment_tries() for element in next_batch for sub in element]
+            _ = [sub.set_status_failed() for element in next_batch for sub in element]
+
+    def _prepare_batch_inputs(self, batch, tmp_dir):
+        tmp_dirs = []
+        args = []
+        for next_subtask_list in batch:
+            tmp_dirs.append(tmp_dir)
+            for (
+                subtask
+            ) in (
+                next_subtask_list
+            ):  # enforced only one task per subtask, otherwise it makes no sense
+                args.append(subtask.data)  # append the arguments list
+        return tmp_dirs, args
+
+    def _execute_mdpocket(self, tmp_dir, arguments):
+
+        self._backend_executor.execute(
+            command=_SFP.MDPOCKET_COMMAND,
+            arguments=arguments,
+            location=tmp_dir,
+            check=True,
+        )
+
+    def execute(self):
+
+        tmp_dir = self._make_tmpdir()
+        # print(paths)
+        self._write_input_files(tmp_dir)
+        # set some constants from the arguments
+        self._set_mdpocket_args()
+
+        # execute the initial mdpocket job (without a specific pocket) to produce the .dx file
+        mdpocket_run1_args = self._parse_arguments(
+            flag_dict={
+                "--trajectory_file": os.path.join(
+                    tmp_dir, self.data.generic.get_argument_by_extension(self.format_)
+                ),
+                "--trajectory_format": self.format_,
+                "-f": os.path.join(
+                    tmp_dir, self.data.generic.get_argument_by_extension("pdb")
+                ),
+            }
+        )
+
+        # run the first command, produce the dx file and a bunch of pocket_n.pdb pocket topology files
+        self._execute_mdpocket(tmp_dir, mdpocket_run1_args)
+
+        # take the produced dx file and create the density grid in pdb format
+        self._create_density_grid_file(tmp_dir, iso_value=self.iso_value)
+
+        # We don't need all of this, but cluster pockets
+        data, labels, _, _, _ = self._cluster_pockets(
+            tmp_dir=tmp_dir,
+            eps=self.eps,
+            min_samples=self.min_samples,
+            threshold=self.threshold,
+            iso_value=self.iso_value,
+        )
+
+        # produces a load of pocket_n.pdb files based on the clusters identified by dbscan
+        self._save_pocket_files(tmp_dir, data, labels)
+        # run MD pocket the second time with a specified pocket - produce a pocket parameter fil
+        # this should be done for each individual pocekt, in parallele
+        # check whether the descriptors flag has been set
+        # if _SFP.DESCRIPTORS in self.settings.additional.keys() and self.settings.additional[_SFP.DESCRIPTORS]:
+        self._run_mdpocket_selected_pocket(tmp_dir)
+
+        # save what's in the tmpdir, then remove tmpdir
+        self._parse_output(tmp_dir)
+        self._logger.log(
+            f"Completed execution for {self.step_id} successfully", _LE.INFO
+        )
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/confgen/__init__.py b/icolos/core/workflow_steps/confgen/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/core/workflow_steps/confgen/base.py b/icolos/core/workflow_steps/confgen/base.py
new file mode 100644
index 0000000..0e440be
--- /dev/null
+++ b/icolos/core/workflow_steps/confgen/base.py
@@ -0,0 +1,7 @@
+from pydantic import BaseModel
+from icolos.core.workflow_steps.step import StepBase
+
+
+class StepConfgenBase(StepBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
diff --git a/icolos/core/workflow_steps/confgen/crest.py b/icolos/core/workflow_steps/confgen/crest.py
new file mode 100644
index 0000000..ba00946
--- /dev/null
+++ b/icolos/core/workflow_steps/confgen/crest.py
@@ -0,0 +1,121 @@
+import os
+from typing import List
+
+from pydantic import BaseModel
+from rdkit import Chem
+from copy import deepcopy
+
+from icolos.utils.execute_external.crest import CrestExecutor
+
+from icolos.utils.general.molecules import get_charge_for_molecule
+
+from icolos.core.containers.compound import Enumeration, Conformer
+
+from icolos.utils.enums.program_parameters import CrestEnum, CrestOutputEnum
+from icolos.core.workflow_steps.step import _LE, _CTE
+from icolos.core.workflow_steps.confgen.base import StepConfgenBase
+
+_EE = CrestEnum()
+_COE = CrestOutputEnum()
+
+
+class StepCREST(StepConfgenBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # initialize the executor and test availability
+        self._initialize_backend(executor=CrestExecutor)
+        self._check_backend_availability()
+
+    def _get_energies_from_XYZ(self, path) -> list:
+        energies = []
+        with open(path, "r") as f:
+            lines = f.readlines()
+            for line in lines:
+                if line.startswith(_COE.PREFIX_ENERGIES_XYZ):
+                    energies.append(line.lstrip().rstrip())
+        return energies
+
+    def _parse_CREST_result(
+        self, dir_path: str, enumeration: Enumeration
+    ) -> List[Conformer]:
+        """Function to parse the result from CREST."""
+        # CREST will output a variety of files to "dir_path"
+        conformers_sdf = os.path.join(dir_path, _COE.CREST_CONFORMERS_SDF)
+        conformers_xyz = os.path.join(dir_path, _COE.CREST_CONFORMERS_XYZ)
+
+        # as the energies are lost in the SDF output, we will add them as a tag
+        energies = self._get_energies_from_XYZ(conformers_xyz)
+        charge = str(
+            get_charge_for_molecule(enumeration.get_molecule(), add_as_tag=False)
+        )
+        mol_supplier = Chem.SDMolSupplier(conformers_sdf, removeHs=False)
+        result = []
+        for mol_id, mol in enumerate(mol_supplier):
+            mol.SetProp(_CTE.CONFORMER_ENERGY_TAG, energies[mol_id])
+            mol.SetProp(_CTE.FORMAL_CHARGE_TAG, charge)
+            result.append(Conformer(conformer=mol))
+        return result
+
+    def _set_formal_charge(self, parameters: dict, molecule: Chem.Mol) -> dict:
+        charge = get_charge_for_molecule(molecule, add_as_tag=False)
+        parameters[_EE.CREST_CHRG] = charge
+        self._logger.log(f"Set charge for molecule to {charge}.", _LE.DEBUG)
+        return parameters
+
+    def _set_number_cores(self, parameters: dict) -> dict:
+        """Function for parallelization of task, setting the number of cores to be used."""
+        parameters[_EE.CREST_T] = int(self.execution.parallelization.cores)
+        return parameters
+
+    def _prepare_settings(self, tmp_dir: str, enumeration: Enumeration) -> list:
+        # first position is the input (SDF) file; the internal input at this stage is a molecule
+        # -> write it to a temporary SDF file (undocumented input functionality) and add the path
+        settings = [self._prepare_temp_input(tmp_dir, enumeration.get_molecule())]
+
+        # add flags
+        for flag in self.settings.arguments.flags:
+            settings.append(flag)
+
+        # add parameters
+        parameters = deepcopy(self.settings.arguments.parameters)
+
+        # update / over-write fields that need a specific value or are defined elsewhere
+        parameters = self._set_number_cores(parameters)
+        parameters = self._set_formal_charge(parameters, enumeration.get_molecule())
+
+        # flatten the dictionary into a list for command-line execution
+        for key in parameters.keys():
+            settings.append(key)
+            settings.append(parameters[key])
+        return settings
+
+    def execute(self):
+        for compound in self.get_compounds():
+            for enumeration in compound.get_enumerations():
+                if not self._input_object_valid(enumeration):
+                    continue
+
+                # set up
+                tmp_dir = self._move_to_temp_dir()
+
+                # the call to CREST starts with the path to the input file, followed by arguments and flags
+                settings = self._prepare_settings(tmp_dir, enumeration=enumeration)
+
+                self._logger.log(
+                    f"Executing CREST backend in folder {tmp_dir}.", _LE.DEBUG
+                )
+                result = self._backend_executor.execute(
+                    command=_EE.CREST, arguments=settings, check=False
+                )
+                self._restore_working_dir()
+
+                conformers = self._parse_CREST_result(tmp_dir, enumeration=enumeration)
+                enumeration.clear_conformers()
+                enumeration.add_conformers(conformers=conformers, auto_update=True)
+                self._logger.log(
+                    f"Executed CREST and obtained {len(conformers)} for enumeration {enumeration.get_index_string()}",
+                    _LE.INFO,
+                )
+
+                self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/confgen/omega.py b/icolos/core/workflow_steps/confgen/omega.py
new file mode 100644
index 0000000..b91646e
--- /dev/null
+++ b/icolos/core/workflow_steps/confgen/omega.py
@@ -0,0 +1,111 @@
+import os
+from typing import List
+
+from pydantic import BaseModel
+from rdkit import Chem
+from copy import deepcopy
+from icolos.utils.execute_external.omega import OMEGAExecutor
+from icolos.core.workflow_steps.step import _LE, _CTE
+from icolos.utils.general.molecules import get_charge_for_molecule
+
+from icolos.core.containers.compound import Enumeration, Conformer
+
+from icolos.utils.enums.program_parameters import OMEGAEnum, OMEGAOutputEnum
+from icolos.core.workflow_steps.confgen.base import StepConfgenBase
+
+_EE = OMEGAEnum()
+_COE = OMEGAOutputEnum()
+
+
+class StepOmega(StepConfgenBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # initialize the executor and test availability
+        self._initialize_backend(executor=OMEGAExecutor)
+        self._check_backend_availability()
+
+    def _parse_OMEGA_result(
+        self, dir_path: str, enumeration: Enumeration
+    ) -> List[Conformer]:
+        # OMEGA will output a variety of files to "dir_path"
+        conformers_sdf = os.path.join(dir_path, _COE.OUTPUT_SDF_NAME)
+
+        # energies are added as a tag in the output
+        mol_supplier = Chem.SDMolSupplier(conformers_sdf, removeHs=False)
+        charge = str(
+            get_charge_for_molecule(enumeration.get_molecule(), add_as_tag=False)
+        )
+        result = []
+        for mol_id, mol in enumerate(mol_supplier):
+            mol.SetProp(
+                _CTE.CONFORMER_ENERGY_TAG, mol.GetProp(_COE.CLASSIC_ENERGY_OUTPUT_TAG)
+            )
+            mol.ClearProp(_COE.CLASSIC_ENERGY_OUTPUT_TAG)
+            mol.SetProp(_CTE.FORMAL_CHARGE_TAG, charge)
+            conf = Conformer(conformer=mol)
+            result.append(conf)
+        return result
+
+    def _set_input_output_paths(self, parameters: dict, input_path: str) -> dict:
+        # this is handled this way to overwrite any specifications from the user for the input / output paths as well
+        parameters[_EE.CLASSIC_INPUT] = input_path
+        parameters[_EE.CLASSIC_OUTPUT] = _COE.OUTPUT_SDF_NAME
+        return parameters
+
+    def _prepare_settings(self, tmp_dir: str, enumeration: Enumeration) -> list:
+        # the first argument is the mode of binary "oeomega" (for now defaults to "classic")
+        settings = [_EE.OMEGA_MODE_CLASSIC]
+
+        # add flags
+        # make sure, the energy tag is set as well
+        for flag in self.settings.arguments.flags:
+            settings.append(flag)
+        if _EE.CLASSIC_SDENERGY not in settings:
+            settings.append(_EE.CLASSIC_SDENERGY)
+
+        # add parameters
+        parameters = deepcopy(self.settings.arguments.parameters)
+
+        # update / over-write fields that need a specific value or are defined elsewhere
+        parameters = self._set_input_output_paths(
+            parameters=parameters,
+            input_path=self._prepare_temp_input(tmp_dir, enumeration.get_molecule()),
+        )
+
+        # flatten the dictionary into a list for command-line execution
+        for key in parameters.keys():
+            settings.append(key)
+            settings.append(parameters[key])
+        return settings
+
+    def execute(self):
+        for compound in self.get_compounds():
+            for enumeration in compound.get_enumerations():
+                if not self._input_object_valid(enumeration):
+                    continue
+
+                # set up
+                tmp_dir = self._move_to_temp_dir()
+                settings = self._prepare_settings(tmp_dir, enumeration=enumeration)
+
+                # execution
+                self._logger.log(
+                    f"Executing OMEGA backend in folder {tmp_dir}.", _LE.DEBUG
+                )
+                result = self._backend_executor.execute(
+                    command=_EE.OMEGA, arguments=settings, check=False
+                )
+                self._restore_working_dir()
+
+                # parsing
+                conformers = self._parse_OMEGA_result(tmp_dir, enumeration=enumeration)
+                enumeration.clear_conformers()
+                enumeration.add_conformers(conformers=conformers, auto_update=True)
+                self._logger.log(
+                    f"Completed OMEGA for enumeration {enumeration.get_index_string()}, added {len(conformers)} conformers.",
+                    _LE.INFO,
+                )
+
+                # clean-up
+                self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/confgen/xtb.py b/icolos/core/workflow_steps/confgen/xtb.py
new file mode 100644
index 0000000..34a18a8
--- /dev/null
+++ b/icolos/core/workflow_steps/confgen/xtb.py
@@ -0,0 +1,170 @@
+import os
+from tempfile import mkdtemp
+
+from pydantic import BaseModel
+from rdkit import Chem
+from copy import deepcopy
+from typing import List, Tuple
+from icolos.utils.execute_external.xtb import XTBExecutor
+
+from icolos.utils.general.molecules import get_charge_for_molecule
+
+from icolos.core.containers.compound import Conformer
+
+from icolos.utils.enums.program_parameters import XTBEnum, XTBOutputEnum
+from icolos.core.workflow_steps.step import _LE, _CTE
+from icolos.core.workflow_steps.confgen.base import StepConfgenBase
+from icolos.utils.general.parallelization import SubtaskContainer, Parallelizer
+
+_EE = XTBEnum()
+_COE = XTBOutputEnum()
+
+
+class StepXTB(StepConfgenBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # initialize the executor and test availability
+        self._initialize_backend(executor=XTBExecutor)
+        self._check_backend_availability()
+
+    def _parse_XTB_result(self, tmp_dirs: List, conformers: List[Conformer]):
+        # XTB will output a variety of files to "dir_path"
+        results = []
+        for dir_path, conformer in zip(tmp_dirs, conformers):
+            optimized_conformer_sdf = os.path.join(dir_path, _COE.XTBOPT_SDF)
+            enum = conformer.get_enumeration_object()
+            # as the energies are added as a tag, but we will use ours
+            # note, that XTB is called to operate on one conformer at a time (which we will return here)
+            mol_supplier = Chem.SDMolSupplier(optimized_conformer_sdf, removeHs=False)
+            mol = None
+            try:
+                for mol in mol_supplier:
+                    mol.SetProp(
+                        _CTE.CONFORMER_ENERGY_TAG, mol.GetProp(_COE.TOTAL_ENERGY_TAG)
+                    )
+                    mol.ClearProp(_COE.TOTAL_ENERGY_TAG)
+                    mol.SetProp(
+                        _CTE.FORMAL_CHARGE_TAG, str(get_charge_for_molecule(mol))
+                    )
+                    enum.add_conformer(Conformer(conformer=mol), auto_update=True)
+                results.append(_COE.SUCCESS)
+
+            except:
+                self._logger.log(
+                    f"Failed to parse XTB results for conformer {conformer.get_index_string()}",
+                    _LE.WARNING,
+                )
+                results.append(_COE.FAILURE)
+        return results
+
+    def _prepare_batch(self, batch) -> Tuple:
+        # first position is the input (SDF) file; the internal input at this stage is a list of molecules
+        # -> write it to a temporary SDF file (undocumented input functionality) and add the path
+
+        tmp_dirs = []
+        input_files = []
+        charges = []
+        conformers = []
+        for next_subtask_list in batch:
+            tmp_dir = mkdtemp()
+            tmp_dirs.append(tmp_dir)
+            for (
+                subtask
+            ) in (
+                next_subtask_list
+            ):  # enforced as one since xtb can't handle multiple files in one call
+                conformer = subtask.data
+                conformers.append(conformer)
+                input_file = self._prepare_temp_input(tmp_dir, conformer.get_molecule())
+                charge = get_charge_for_molecule(conformer.get_molecule())
+
+                charges.append(charge)
+                input_files.append(input_file)
+        return tmp_dirs, input_files, charges, conformers
+
+    def _prepare_arguments(self, settings: List) -> List:
+
+        # add flags
+        for flag in self.settings.arguments.flags:
+            settings.append(flag)
+
+        # add parameters
+        parameters = deepcopy(self.settings.arguments.parameters)
+
+        # flatten the dictionary into a list for command-line execution
+        for key in parameters.keys():
+            settings.append(key)
+            settings.append(parameters[key])
+        return settings
+
+    def _run_subjob(self, tmp_dir: str, input_file: str, charge: int) -> None:
+
+        work_dir = os.getcwd()
+        os.chdir(tmp_dir)
+
+        arguments = [input_file, _EE.XTB_P, charge]
+        arguments = self._prepare_arguments(
+            arguments
+        )  # add additional parameters from config
+
+        result = self._backend_executor.execute(
+            command=_EE.XTB, arguments=arguments, check=False
+        )
+        # for line in result.stdout.split("\n"):
+        #     self._logger_blank.log(line, _LE.DEBUG)
+        #     # print(line)
+        os.chdir(work_dir)
+
+    def _execute_xtb(self):
+        xtb_parallelizer = Parallelizer(func=self._run_subjob)
+        n = 1
+
+        tmp_dirs = None
+        while self._subtask_container.done() is False:
+
+            next_batch = self._get_sublists(get_first_n_lists=self._get_number_cores())
+            tmp_dirs, input_files, charges, conformers = self._prepare_batch(next_batch)
+
+            _ = [sub.increment_tries() for element in next_batch for sub in element]
+            _ = [sub.set_status_failed() for element in next_batch for sub in element]
+
+            self._logger.log(f"Executing xtb for batch {n}", _LE.DEBUG)
+
+            xtb_parallelizer.execute_parallel(
+                tmp_dir=tmp_dirs,
+                input_file=input_files,
+                charge=charges,
+            )
+
+            results = self._parse_XTB_result(tmp_dirs, conformers)
+            for sublist, result in zip(next_batch, results):
+                assert len(sublist) == 1
+                # TODO: this only works if max length sublist == 1, fine for now as that is all turbomole can handle
+                for task in sublist:
+                    if result == _COE.SUCCESS:
+                        task.set_status_success()
+                    else:
+                        task.set_status_failed()
+
+            n += 1
+        self._remove_temporary(tmp_dirs)
+
+    def execute(self):
+        all_conformers = []
+        for compound in self.get_compounds():
+            for enumeration in compound.get_enumerations():
+                if enumeration.get_conformers():
+                    for conformer in enumeration.get_conformers():
+                        all_conformers.append(conformer)
+                enumeration.clear_conformers()
+        self.execution.parallelization.max_length_sublists = 1
+        self._subtask_container = SubtaskContainer(
+            max_tries=self.execution.failure_policy.n_tries
+        )
+        self._subtask_container.load_data(all_conformers)
+        self._execute_xtb()
+        self._logger.log(
+            f"Completed execution of XTB for {len(all_conformers)} conformers",
+            _LE.DEBUG,
+        )
diff --git a/icolos/core/workflow_steps/gromacs/__init__.py b/icolos/core/workflow_steps/gromacs/__init__.py
new file mode 100644
index 0000000..16884f7
--- /dev/null
+++ b/icolos/core/workflow_steps/gromacs/__init__.py
@@ -0,0 +1,10 @@
+from icolos.core.workflow_steps.gromacs.cluster import StepGMXCluster
+from icolos.core.workflow_steps.gromacs.editconf import StepGMXEditConf
+from icolos.core.workflow_steps.gromacs.genion import StepGMXGenion
+from icolos.core.workflow_steps.gromacs.grompp import StepGMXGrompp
+from icolos.core.workflow_steps.gromacs.mdrun import StepGMXMDrun
+from icolos.core.workflow_steps.gromacs.pdb2gmx import StepGMXPdb2gmx
+from icolos.core.workflow_steps.gromacs.solvate import StepGMXSolvate
+from icolos.core.workflow_steps.gromacs.trjconv import StepGMXTrjconv
+from icolos.core.workflow_steps.gromacs.clusters_ts import StepClusterTS
+from icolos.core.workflow_steps.gromacs.rsmd import StepGMXrmsd
diff --git a/icolos/core/workflow_steps/gromacs/base.py b/icolos/core/workflow_steps/gromacs/base.py
new file mode 100644
index 0000000..06a49bd
--- /dev/null
+++ b/icolos/core/workflow_steps/gromacs/base.py
@@ -0,0 +1,195 @@
+from icolos.core.containers.generic import GenericData
+from icolos.utils.enums.execution_enums import ExecutionResourceEnum
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from pydantic import BaseModel
+import os
+from typing import List
+from icolos.core.workflow_steps.step import StepBase
+from icolos.core.workflow_steps.step import _LE
+import re
+from copy import deepcopy
+from distutils.dir_util import copy_tree
+from icolos.utils.enums.program_parameters import GromacsEnum
+from icolos.utils.execute_external.batch_executor import BatchExecutor
+from icolos.utils.execute_external.gromacs import GromacsExecutor
+
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+_GE = GromacsEnum()
+_ERE = ExecutionResourceEnum
+
+
+class StepGromacsBase(StepBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def _write_input_files(self, tmp_dir):
+
+        # Normally this should be handled by setting GMXLIB env variable, but for some programs (gmx_MMPBSA), this doesn't work and non-standard forcefields
+        # need to be in the working directory
+        if _SGE.FORCEFIELD in self.settings.additional:
+            copy_tree(
+                self.settings.additional[_SGE.FORCEFIELD],
+                os.path.join(
+                    tmp_dir, self.settings.additional[_SGE.FORCEFIELD].split("/")[-1]
+                ),
+            )
+            self._logger.log(
+                f"Copied forcefield at {self.settings.additional[_SGE.FORCEFIELD]} to the working "
+                f"directory at {tmp_dir}",
+                _LE.INFO,
+            )
+
+        self._logger.log(
+            f"Writing input files to working directory at {tmp_dir}", _LE.DEBUG
+        )
+        for file in self.data.generic.get_flattened_files():
+            file.write(tmp_dir)
+
+    def _parse_arguments(self, flag_dict: dict, args: list = None) -> List:
+        arguments = args if args is not None else []
+        # first add the settings from the command line
+        for key in self.settings.arguments.parameters.keys():
+            arguments.append(key)
+            arguments.append(str(self.settings.arguments.parameters[key]))
+        for flag in self.settings.arguments.flags:
+            arguments.append(str(flag))
+        for key, value in flag_dict.items():
+            # only add defaults if they have not been specified in the json
+            if key not in arguments:
+                arguments.append(key)
+                arguments.append(value)
+        return arguments
+
+    def _copy_fields_dict(self):
+        try:
+            update_dictionary = deepcopy(self.settings.additional[_SGE.FIELDS])
+            return update_dictionary
+        except KeyError:
+            self._logger.log(
+                "Update dictionary not present, will use provided mdp file without further modification",
+                _LE.WARNING,
+            )
+            return {}
+
+    def generate_output_file(self, in_file):
+        parts = in_file.split(".")
+        return parts[0] + "_out" + "." + parts[1]
+
+    def _modify_config_file(
+        self, tmp_dir: str, config_file: GenericData, update_dict: dict
+    ):
+        file_data = config_file.get_data()
+        for key, value in update_dict.items():
+            pattern = fr"({key})(\s*=\s*)[a-zA-Z0-9\s\_]*(\s*;)"
+            pattern = re.compile(pattern)
+            matches = re.findall(pattern, file_data)
+            if len(matches) == 0:
+                self._logger.log(
+                    f"Specified key {key} was not found in the mdp file, value was not changed!",
+                    _LE.WARNING,
+                )
+            else:
+
+                file_data = re.sub(pattern, fr"\1\2 {value} \3", file_data)
+                self._logger.log(
+                    f"Replaced field {key} of mdp file with value {value}", _LE.DEBUG
+                )
+        self._logger.log(f"Final MDP file for step {self.step_id} is: ", _LE.DEBUG)
+        for line in file_data.split("\n"):
+            self._logger_blank.log(line, _LE.DEBUG)
+        config_file.set_data(file_data)
+        config_file.write(tmp_dir)
+
+    def _generate_index_groups(self, tmp_dir):
+        try:
+
+            structure = [
+                f for f in os.listdir(tmp_dir) if f.endswith(_SGE.FIELD_KEY_STRUCTURE)
+            ]
+            assert len(structure) == 1
+            structure = structure[0]
+        except AssertionError:
+            structure = [
+                f for f in os.listdir(tmp_dir) if f.endswith(_SGE.FIELD_KEY_TPR)
+            ]
+            structure = structure[0]
+
+        args = ["-f", structure]
+        ndx_list = [f for f in os.listdir(tmp_dir) if f.endswith(_SGE.FIELD_KEY_NDX)]
+        if len(ndx_list) == 1:
+            args.extend(["-n", ndx_list[0]])
+        result = self._backend_executor.execute(
+            command=_GE.MAKE_NDX,
+            arguments=args,
+            location=tmp_dir,
+            check=True,
+            pipe_input='echo -e "q"',
+        )
+        return result
+
+    def construct_pipe_arguments(self, tmp_dir, params) -> str:
+        """
+        Constructs the pipe arguments to be passed to gromacs interactive programs
+        """
+        # look up the groups that have been passed, try to identify the group number in the corresponding index file
+
+        result = self._generate_index_groups(tmp_dir)
+        output = ['echo -e "']
+        for param in params.split():
+            if param == "or":
+                output.append("|")
+            elif param == "and":
+                output.append("&")
+            elif param == "not":
+                output.append("!")
+            elif param == ";":
+                output.append("\n")
+            else:
+                added_one = False
+                for line in result.stdout.split("\n"):
+                    parts = line.split()
+                    if param in parts and param == parts[1]:
+                        idx = parts[0]
+                        # print("found index", idx, f"for {param}")
+                        added_one = True
+                        output.append(idx)
+                        break
+                if not added_one:
+                    output.append(param)
+        output.append('\nq"')
+        self._logger.log(f"Constructed pipe input {' '.join(output)}", _LE.DEBUG)
+        return " ".join(output)
+
+    def _add_index_group(self, tmp_dir, pipe_input):
+        ndx_args_2 = [
+            "-f",
+            self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_STRUCTURE),
+            "-o",
+            os.path.join(tmp_dir, _SGE.STD_INDEX),
+        ]
+        self._logger.log(
+            f"Added group to index file using command {pipe_input}",
+            _LE.DEBUG,
+        )
+        result = self._backend_executor.execute(
+            command=_GE.MAKE_NDX,
+            arguments=ndx_args_2,
+            location=tmp_dir,
+            check=True,
+            pipe_input=self.construct_pipe_arguments(tmp_dir, pipe_input),
+        )
+        for line in result.stdout.split("\n"):
+            self._logger_blank.log(line, _LE.INFO)
+
+    def _get_gromacs_executor(self):
+        # return either the GromacsExecutor or batch executor depending on the running mode for the job
+
+        if self.execution.resource == _ERE.LOCAL:
+            return GromacsExecutor
+        elif self.execution.resource == _ERE.SLURM:
+            return BatchExecutor
+        else:
+            raise TypeError(
+                f"Exeucution resource type {self.execution.resource} not recognised",
+            )
diff --git a/icolos/core/workflow_steps/gromacs/cluster.py b/icolos/core/workflow_steps/gromacs/cluster.py
new file mode 100644
index 0000000..0c67453
--- /dev/null
+++ b/icolos/core/workflow_steps/gromacs/cluster.py
@@ -0,0 +1,81 @@
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from icolos.utils.enums.program_parameters import GromacsEnum
+from icolos.core.workflow_steps.gromacs.base import StepGromacsBase
+from icolos.utils.execute_external.gromacs import GromacsExecutor
+from pydantic import BaseModel
+from icolos.core.workflow_steps.step import _LE
+import os
+
+_GE = GromacsEnum()
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+
+
+class StepGMXCluster(StepGromacsBase, BaseModel):
+    """
+    Execute gmx cluster on a trajectory
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=GromacsExecutor)
+        self._check_backend_availability()
+
+    def execute(self):
+        tmp_dir = self._make_tmpdir()
+        self._write_input_files(tmp_dir)
+
+        # give the option to run a make_ndx step preceding clustering to facilitate clustering on custom groups
+        if _SGE.INDEX_FLAG in self.settings.arguments.parameters.keys():
+            assert (
+                _SGE.STD_INDEX in os.listdir(tmp_dir)
+                or self.settings.additional[_SGE.MAKE_NDX_COMMAND] is not None
+            )
+            if _SGE.STD_INDEX not in os.listdir(tmp_dir):
+                try:
+                    ndx_arguments = [
+                        "-f",
+                        self.data.generic.get_argument_by_extension(
+                            _SGE.FIELD_KEY_STRUCTURE
+                        ),
+                        "-o",
+                        _SGE.STD_INDEX,
+                    ]
+                    result = self._backend_executor.execute(
+                        command=_GE.MAKE_NDX,
+                        arguments=ndx_arguments,
+                        location=tmp_dir,
+                        check=True,
+                        pipe_input=self.construct_pipe_arguments(
+                            tmp_dir, self.settings.additional[_SGE.MAKE_NDX_COMMAND]
+                        ),
+                    )
+
+                except KeyError:
+                    raise KeyError(
+                        "If the index flag was specified, you must provide the ndx command in additional "
+                        "settings"
+                    )
+
+        flag_dict = {
+            "-s": self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TPR),
+            "-f": self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_XTC),
+            "-cl": "clusters.pdb",
+        }
+        arguments = self._parse_arguments(flag_dict=flag_dict)
+
+        result = self._backend_executor.execute(
+            command=_GE.CLUSTER,
+            arguments=arguments,
+            location=tmp_dir,
+            check=True,
+            pipe_input=self.construct_pipe_arguments(
+                tmp_dir, self.settings.additional[_SBE.PIPE_INPUT]
+            ),
+        )
+
+        for line in result.stdout.split("\n"):
+            self._logger_blank.log(line, _LE.DEBUG)
+        self._parse_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/gromacs/clusters_ts.py b/icolos/core/workflow_steps/gromacs/clusters_ts.py
new file mode 100644
index 0000000..6df4c9f
--- /dev/null
+++ b/icolos/core/workflow_steps/gromacs/clusters_ts.py
@@ -0,0 +1,88 @@
+from icolos.core.workflow_steps.gromacs.base import StepGromacsBase
+from icolos.utils.enums.step_enums import StepGromacsEnum
+from typing import List
+from pydantic import BaseModel
+from icolos.utils.execute_external.execute import Executor
+from icolos.core.workflow_steps.step import _LE
+from icolos.utils.enums.program_parameters import GromacsEnum
+import os
+import sys
+
+_SGE = StepGromacsEnum()
+_GE = GromacsEnum()
+
+
+class StepClusterTS(StepGromacsBase, BaseModel):
+    """
+    Generate time-resolved cluster plots from the output of gmx cluster, relies on MDplot R package
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=Executor)
+
+    def _construct_args(self, defaults: dict) -> List:
+        """
+        Custom method for argument construction, includes checks for required args in the config.
+        """
+        args = []
+        for key, value in self.settings.arguments.parameters.items():
+            args.append("".join([key, "=", value]))
+            print(args)
+
+        for value in self.settings.arguments.flags:
+            args.append(value)
+        print(args)
+        for key, value in defaults.items():
+            if key not in self.settings.arguments.parameters.keys():
+                args.append("".join([key, "=", value]))
+
+        # do some checks to make sure the required params have been passed
+        for arg in [_SGE.CLUSTERS_NUMBER, _SGE.LENGTHS]:
+            if arg not in self.settings.arguments.parameters.keys():
+                self._logger.log(
+                    f"Argument for parameter {arg} not found in provided argument. \
+                        This must be specified!.  If this workflow has attached stdin, \
+                            you can enter the value now...",
+                    _LE.WARNING,
+                )
+                # instead of bailing out, take input from user if process has stdin connected
+                if sys.stdin and sys.stdin.isatty():
+                    value = input(f"Provide the parameter for option {arg}>>>")
+                    args.append("".join([key, "=", value]))
+                else:
+                    self._logger.log(
+                        f"No stdin stream detected, and cannot infer argument, step {self.step_id} may fail",
+                        _LE.WARNING,
+                    )
+        return args
+
+    def execute(self):
+        """
+        Visualise time-resolved gmx cluster results.
+        Requires predceeding gmx_cluster step with clust-id.xvg file
+        (ensure -clid flag is set, and xvg file is passed to this step)
+        """
+
+        tmp_dir = self._make_tmpdir()
+        self.data.generic.write_out_all_files(tmp_dir)
+        xvg_file = self.data.generic.get_argument_by_extension(ext="xvg")
+
+        arguments = self._construct_args(
+            defaults={
+                "files": os.path.join(tmp_dir, xvg_file),
+                "size": "1500,1500",
+                "outformat": "png",
+                "outfile": "clusters_ts.png",
+                "timeUnit": "ns",
+                "title": "CLUSTERS_timeseries",
+            },
+        )
+
+        self._backend_executor.execute(
+            command=_GE.CLUSTER_TS, arguments=arguments, location=tmp_dir, check=True
+        )
+
+        self._parse_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/gromacs/do_dssp.py b/icolos/core/workflow_steps/gromacs/do_dssp.py
new file mode 100644
index 0000000..825cd92
--- /dev/null
+++ b/icolos/core/workflow_steps/gromacs/do_dssp.py
@@ -0,0 +1,57 @@
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from icolos.utils.enums.program_parameters import GromacsEnum
+from icolos.utils.execute_external.gromacs import GromacsExecutor
+from pydantic import BaseModel
+from icolos.core.workflow_steps.step import _LE
+from icolos.core.workflow_steps.gromacs.base import StepGromacsBase
+
+_GE = GromacsEnum()
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+
+
+class StepGMXDoDSSP(StepGromacsBase, BaseModel):
+    """
+    Wrapper for gmx do_dssp binary, which in turn wraps dssp
+    returns secondary structure data for the provided gromacs trajectory
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=GromacsExecutor)
+        self._check_backend_availability()
+
+    def execute(self):
+        tmp_dir = self._make_tmpdir()
+        self._write_input_files(tmp_dir)
+
+        structure_file = self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TPR)
+        traj_file = self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_XTC)
+        arguments = self._parse_arguments(
+            flag_dict={"-f": traj_file, "-s": structure_file, "-ssdump": "info.dat"}
+        )
+
+        pipe_input = (
+            self.construct_pipe_arguments(
+                tmp_dir, self.settings.additional[_SBE.PIPE_INPUT]
+            )
+            if _SBE.PIPE_INPUT in self.settings.additional.keys()
+            and self.settings.additional[_SBE.PIPE_INPUT] is not None
+            else None
+        )
+
+        result = self._backend_executor.execute(
+            command=_GE.DO_DSSP,
+            arguments=arguments,
+            location=tmp_dir,
+            pipe_input=pipe_input,
+        )
+        for line in result.stdout.split("\n"):
+            self._logger_blank.log(line, _LE.DEBUG)
+        self._logger.log(
+            f"Completed execution for {self.step_id} successfully", _LE.INFO
+        )
+
+        self._parse_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/gromacs/editconf.py b/icolos/core/workflow_steps/gromacs/editconf.py
new file mode 100644
index 0000000..c522c13
--- /dev/null
+++ b/icolos/core/workflow_steps/gromacs/editconf.py
@@ -0,0 +1,57 @@
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from icolos.utils.enums.program_parameters import GromacsEnum
+from icolos.utils.execute_external.gromacs import GromacsExecutor
+from pydantic import BaseModel
+from icolos.core.workflow_steps.step import _LE
+from icolos.core.workflow_steps.gromacs.base import StepGromacsBase
+
+_GE = GromacsEnum()
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+
+
+class StepGMXEditConf(StepGromacsBase, BaseModel):
+    """
+    Wrapper for gmx editconf
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=GromacsExecutor)
+        self._check_backend_availability()
+
+    def execute(self):
+        tmp_dir = self._make_tmpdir()
+        self._write_input_files(tmp_dir)
+
+        structure_file = self.data.generic.get_argument_by_extension(
+            _SGE.FIELD_KEY_STRUCTURE
+        )
+        arguments = self._parse_arguments(
+            flag_dict={"-f": structure_file, "-o": structure_file}
+        )
+
+        pipe_input = (
+            self.construct_pipe_arguments(
+                tmp_dir, self.settings.additional[_SBE.PIPE_INPUT]
+            )
+            if _SBE.PIPE_INPUT in self.settings.additional.keys()
+            and self.settings.additional[_SBE.PIPE_INPUT] is not None
+            else None
+        )
+
+        result = self._backend_executor.execute(
+            command=_GE.EDITCONF,
+            arguments=arguments,
+            location=tmp_dir,
+            pipe_input=pipe_input,
+        )
+        for line in result.stdout.split("\n"):
+            self._logger_blank.log(line, _LE.DEBUG)
+        self._logger.log(
+            f"Completed execution for {self.step_id} successfully", _LE.INFO
+        )
+
+        self._parse_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/gromacs/genion.py b/icolos/core/workflow_steps/gromacs/genion.py
new file mode 100644
index 0000000..688e64a
--- /dev/null
+++ b/icolos/core/workflow_steps/gromacs/genion.py
@@ -0,0 +1,69 @@
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from icolos.utils.enums.program_parameters import GromacsEnum
+from icolos.core.workflow_steps.gromacs.base import StepGromacsBase
+from icolos.utils.execute_external.gromacs import GromacsExecutor
+from pydantic import BaseModel
+from icolos.core.workflow_steps.step import _LE
+import os
+
+_GE = GromacsEnum()
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+
+
+class StepGMXGenion(StepGromacsBase, BaseModel):
+    """
+    Wrapper for gmx genion
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=GromacsExecutor)
+        self._check_backend_availability()
+
+    def execute(self):
+        tmp_dir = self._make_tmpdir()
+        self._write_input_files(tmp_dir)
+        arguments = self._parse_arguments(
+            {
+                # input file paths are handled internally
+                "-o": _SGE.STD_STRUCTURE,
+                "-p": self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TOPOL),
+                "-s": self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TPR),
+            }
+        )
+        result = self._backend_executor.execute(
+            command=_GE.GENION,
+            arguments=arguments,
+            location=tmp_dir,
+            pipe_input=self.construct_pipe_arguments(
+                tmp_dir, self.settings.additional[_SBE.PIPE_INPUT]
+            ),
+        )
+        for line in result.stdout.split("\n"):
+            self._logger_blank.log(line, _LE.DEBUG)
+        self._logger.log(
+            f"Completed execution for {self.step_id} successfully", _LE.INFO
+        )
+        # this is the last structural change to the topology in a regular gromacs setup,
+        # update the index groups here
+        make_ndx_args = ["-f", _SGE.STD_STRUCTURE, "-o", _SGE.STD_INDEX]
+        index_files = [f for f in os.listdir(tmp_dir) if f.endswith(".ndx")]
+        # remove any existing index files
+        for f in index_files:
+            self._remove_temporary(os.path.join(tmp_dir, f))
+        # generate new index file
+        result = self._backend_executor.execute(
+            command=_GE.MAKE_NDX,
+            arguments=make_ndx_args,
+            location=tmp_dir,
+            check=True,
+            pipe_input='echo -e "1 | 12 \nq"',
+        )
+        for line in result.stdout.split("\n"):
+            self._logger_blank.log(line, _LE.DEBUG)
+
+        self._logger.log('Added index group to "index.ndx"', _LE.DEBUG)
+        self._parse_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/gromacs/grompp.py b/icolos/core/workflow_steps/gromacs/grompp.py
new file mode 100644
index 0000000..1e5508d
--- /dev/null
+++ b/icolos/core/workflow_steps/gromacs/grompp.py
@@ -0,0 +1,125 @@
+from icolos.utils.enums.step_enums import StepGromacsEnum
+from icolos.utils.enums.program_parameters import GromacsEnum
+from icolos.core.workflow_steps.gromacs.base import StepGromacsBase
+from icolos.utils.execute_external.gromacs import GromacsExecutor
+from pydantic import BaseModel
+from icolos.core.workflow_steps.step import _LE
+
+_GE = GromacsEnum()
+_SGE = StepGromacsEnum()
+
+
+class StepGMXGrompp(StepGromacsBase, BaseModel):
+    """
+    Wraps gromacs preprocessor, produces tpr file preceeding mdrun step
+    Automatically handles coupling group updates
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=GromacsExecutor)
+        self._check_backend_availability()
+
+    def _auto_update_coupling_groups(self, tmp_dir):
+        # this will handle most straightforward cases with protein+ligand, DNA, RNA,
+        result = self._generate_index_groups(tmp_dir)
+        add_other = False
+        add_ions = False
+
+        # check whether the ions and other index groups are present
+        for line in result.stdout.split("\n"):
+            parts = line.split()
+
+            if len(parts) == 5:
+                if parts[1] in _GE.PRIMARY_COMPONENTS:
+                    primary_component = parts[1]
+                    # identify Protein, DNA, RNA
+                elif parts[1] == "Other":
+                    add_other = True
+                elif parts[1] == _SGE.WATER_AND_IONS:
+                    add_ions = True
+
+        update_dict = self._copy_fields_dict()
+        pipe_input = ""
+        tc_grps = ""
+        if add_other:
+            pipe_input += f"{primary_component} or Other"
+            tc_grps += f"{primary_component}_Other"
+        else:
+            tc_grps += primary_component
+        if add_ions:
+            tc_grps += " "
+            tc_grps += _SGE.WATER_AND_IONS
+        else:
+            tc_grps += " "
+            tc_grps += "Water"
+
+        update_dict[_SGE.TC_GRPS] = tc_grps
+
+        if pipe_input:
+            self._add_index_group(tmp_dir, pipe_input)
+
+        # update the mdp file with the modified coupling groups
+        self._modify_config_file(
+            tmp_dir,
+            self.data.generic.get_argument_by_extension(
+                _SGE.FIELD_KEY_MDP, rtn_file_object=True
+            ),
+            update_dict,
+        )
+
+    def execute(self):
+        """
+        Set up required mdp file and run gmx grompp
+        Note that any issues with your parametrisationor system building will normally cause grompp to panic
+        """
+        tmp_dir = self._make_tmpdir()
+        self._write_input_files(tmp_dir)
+
+        # if make_ndx command has been specified in settings.additional,
+        # add an index group here, commonly protein_ligand or protein_other
+
+        if (
+            _SGE.MAKE_NDX_COMMAND in self.settings.additional.keys()
+            and self.settings.additional[_SGE.MAKE_NDX_COMMAND] is not None
+        ):
+            # normally you want your two t-coupling groups to be something like Protein_Other Water_Ions
+            # these can be added automatically with the "auto" keyword
+            if self.settings.additional[_SGE.MAKE_NDX_COMMAND] == _SGE.AUTO:
+                # automatically update the coupling groups, check for presence of 'ions' and 'other',
+                # update default coupling groups in mdp file
+                self._auto_update_coupling_groups(tmp_dir)
+            else:
+                # the mdp file will not be modified, coupling groups must be set correctly prior to job execution
+                self._add_index_group(
+                    tmp_dir, self.settings.additional[_SGE.MAKE_NDX_COMMAND]
+                )
+
+        structure_file = self.data.generic.get_argument_by_extension(
+            _SGE.FIELD_KEY_STRUCTURE
+        )
+        mdp_file = self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_MDP)
+        topol_file = self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TOPOL)
+
+        args = ["-r", structure_file] if self.settings.additional["-r"] else []
+
+        arguments = self._parse_arguments(
+            flag_dict={
+                "-f": mdp_file,
+                "-c": structure_file,
+                "-p": topol_file,
+                "-o": _SGE.STD_TPR,
+            },
+            args=args,
+        )
+        result = self._backend_executor.execute(
+            command=_GE.GROMPP, arguments=arguments, check=True, location=tmp_dir
+        )
+        for line in result.stdout.split("\n"):
+            self._logger_blank.log(line, _LE.DEBUG)
+        self._logger.log(
+            f"Completed execution for {self.step_id} successfully", _LE.INFO
+        )
+        self._parse_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/gromacs/mdrun.py b/icolos/core/workflow_steps/gromacs/mdrun.py
new file mode 100644
index 0000000..0739fd8
--- /dev/null
+++ b/icolos/core/workflow_steps/gromacs/mdrun.py
@@ -0,0 +1,67 @@
+from icolos.utils.enums.step_enums import StepGromacsEnum
+from icolos.utils.enums.program_parameters import GromacsEnum
+from icolos.core.workflow_steps.gromacs.base import StepGromacsBase
+from pydantic import BaseModel
+from icolos.core.workflow_steps.step import _LE
+import os
+
+_GE = GromacsEnum()
+_SGE = StepGromacsEnum()
+
+
+class StepGMXMDrun(StepGromacsBase, BaseModel):
+    """
+    Launch gmx mdrun
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=self._get_gromacs_executor())
+        self._check_backend_availability()
+
+    def _get_log_file(self, tmp_dir):
+        """
+        Find and parse the log file
+        """
+        log_file = [f for f in os.listdir(tmp_dir) if f.endswith(".log")]
+        assert len(log_file) == 1
+        with open(os.path.join(tmp_dir, log_file[0]), "r") as f:
+            data = f.readlines()
+        return data
+
+    def _tail_log_file(self, tmp_dir):
+        """
+        Log the last 50 lines of the log file to capture performance metrics from the run
+
+        """
+        log_file = self._get_log_file(tmp_dir)
+
+        for line in log_file[-50:]:
+            self._logger_blank.log(line, _LE.INFO)
+
+    def execute(self):
+
+        tmp_dir = self._make_tmpdir()
+        # if we're simulating a protein, we need to modify the topol file to include the correct index groups \
+        # to allow ligand restraint.  This means an ndx file must be specified in the json
+        self._write_input_files(tmp_dir)
+        # append _out to the xtc file name
+        xtc_output_file = self.generate_output_file(_SGE.STD_XTC)
+        arguments = self._parse_arguments(
+            flag_dict={
+                "-s": self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TPR),
+                "-c": _SGE.STD_STRUCTURE,
+                "-x": xtc_output_file,
+            }
+        )
+        self._backend_executor.execute(
+            command=_GE.MDRUN, arguments=arguments, location=tmp_dir, check=True
+        )
+
+        self._tail_log_file(tmp_dir)
+        self._logger.log(
+            f"Completed execution for {self.step_id} successfully", _LE.INFO
+        )
+        self._parse_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/gromacs/mmpbsa.py b/icolos/core/workflow_steps/gromacs/mmpbsa.py
new file mode 100644
index 0000000..c1d97ab
--- /dev/null
+++ b/icolos/core/workflow_steps/gromacs/mmpbsa.py
@@ -0,0 +1,161 @@
+from subprocess import CompletedProcess
+from icolos.core.containers.generic import GenericData
+from typing import AnyStr, List
+from icolos.core.workflow_steps.gromacs.base import StepGromacsBase
+from pydantic import BaseModel
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from icolos.core.workflow_steps.step import _LE
+from icolos.utils.enums.program_parameters import GromacsEnum
+import os
+from icolos.utils.general.files_paths import attach_root_path
+
+_SGE = StepGromacsEnum()
+_GE = GromacsEnum()
+_SBE = StepBaseEnum
+
+
+class StepGMXmmpbsa(StepGromacsBase, BaseModel):
+    """
+    Execute gmx_MMPBSA, calculates binding free energy of
+    protein-ligand complex using single trajectory approximation,
+    using Amber's mmpbsa.py script
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(self._get_gromacs_executor())
+        self._check_backend_availability()
+
+    def _get_arg(self, ext) -> AnyStr:
+        return self.data.generic.get_argument_by_extension(ext)
+
+    def _generate_amber_input_file(self) -> None:
+        input_file = (
+            self.settings.additional[_SGE.INPUT_FILE]
+            if _SGE.INPUT_FILE in self.settings.additional.keys()
+            else None
+        )
+        # Normally the user should provide an input file to control the mmgbsa protocol
+        if input_file is not None and os.path.isfile(input_file):
+            self._logger.log(
+                f"Using provided AMBER input file at {self.settings.additional[_SGE.INPUT_FILE]}",
+                _LE.DEBUG,
+            )
+            with open(input_file, "r") as f:
+                template = GenericData(file_name="mmpbsa.in", file_data=f.read())
+        else:
+            self._logger.log("No input file found, defaulting to template", _LE.WARNING)
+            # parses user arguments and creates the formatted amber input file from the user specification
+            with open(
+                attach_root_path("icolos/config/amber/default_mmpbsa.in"), "r"
+            ) as f:
+                template = GenericData(file_name="mmpbsa.in", file_data=f.read())
+
+        self.data.generic.add_file(template)
+
+    def _parse_arguments(self, flag_dict: dict) -> List:
+        args = []
+        for flag in self.settings.arguments.flags:
+            if flag != "-O":
+                args.append(flag)
+        for key, value in self.settings.arguments.parameters.items():
+            args.append(key)
+            args.append(value)
+        for key, value in flag_dict.items():
+            if key not in args:
+                args.append(key)
+                args.append(value)
+
+        # capture output
+        return args
+
+    def _run_mmpbsa(self, args, tmp_dir) -> CompletedProcess:
+        command = _GE.MMPBSA
+        self._logger.log(f"Executing mmgbsa calculation in dir {tmp_dir}", _LE.DEBUG)
+        result = self._backend_executor.execute(
+            command=command, arguments=args, check=True, location=tmp_dir
+        )
+        for line in result.stdout.split("\n"):
+            self._logger_blank.log(line, _LE.INFO)
+        for line in result.stderr.split("\n"):
+            self._logger_blank.log(line, _LE.INFO)
+
+        return result
+
+    def _parse_coupling_groups(self, tmp_dir) -> AnyStr:
+        # parse the coupling groups to their indexes
+        output = []
+        pipe_input = self.settings.additional[_SGE.COUPLING_GROUPS]
+
+        structure = self.data.generic.get_argument_by_extension(
+            _SGE.FIELD_KEY_STRUCTURE
+        )
+        arguments = ["-f", structure]
+        if [f for f in os.listdir(tmp_dir) if f.endswith("ndx")]:
+            arguments.extend(["-n", "index.ndx"])
+        else:
+            arguments.extend(["-o", "index.ndx"])
+
+        result = self._backend_executor.execute(
+            command=_GE.MAKE_NDX,
+            arguments=arguments,
+            location=tmp_dir,
+            check=True,
+            pipe_input='echo -e "q"',
+        )
+        for param in pipe_input.split():
+            for line in result.stdout.split("\n"):
+                parts = line.split()
+                if param in line and parts[1] == param:
+                    output.append(parts[0])
+                    break
+        self._logger.log(f"Resolved coupling groups {output}", _LE.DEBUG)
+        return " ".join(output)
+
+    def _get_file_from_dir(self, tmp_dir: str, ext: str) -> AnyStr:
+        file = [f for f in os.listdir(tmp_dir) if f.endswith(ext)]
+        assert len(file) == 1
+        return file[0]
+
+    def execute(self) -> None:
+        """
+        Execute gmx_MMPBSA
+        Note: execution using mpirun is not supported for stability reasons
+        """
+        tmp_dir = self._make_tmpdir()
+
+        self._generate_amber_input_file()
+        self._write_input_files(tmp_dir)
+
+        # gmx_MMPBSA requires the coupling groups of the receptor and ligand
+
+        # form any required coupling groups with make_ndx_command before parsing coupling groups
+        # e.g. combine protein + cofactor
+        ndx_commands = (
+            self.settings.additional[_SGE.MAKE_NDX_COMMAND]
+            if _SGE.MAKE_NDX_COMMAND in self.settings.additional.keys()
+            else None
+        )
+        if ndx_commands is not None:
+            # can run make_ndx multiple times for complex cases, each set of pipe imput must be separated by a semicolon
+            for args in ndx_commands.split(";"):
+                self._add_index_group(tmp_dir=tmp_dir, pipe_input=args)
+        flag_dict = {
+            "-i": _SGE.MMPBSA_IN,
+            "-cs": self._get_arg("tpr"),
+            "-cg": self._parse_coupling_groups(tmp_dir),
+            "-ci": self._get_file_from_dir(tmp_dir=tmp_dir, ext="ndx"),
+            "-ct": self._get_arg("xtc"),
+            "-cp": self._get_arg("top"),
+            # do not attempt to open the results in the GUI afterwards
+            "-nogui": "",
+        }
+
+        flag_list = self._parse_arguments(flag_dict=flag_dict)
+
+        result = self._run_mmpbsa(flag_list, tmp_dir)
+
+        # parse and delete generated output
+        self._parse_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/gromacs/pdb2gmx.py b/icolos/core/workflow_steps/gromacs/pdb2gmx.py
new file mode 100644
index 0000000..bc8a33e
--- /dev/null
+++ b/icolos/core/workflow_steps/gromacs/pdb2gmx.py
@@ -0,0 +1,455 @@
+from icolos.utils.enums.program_parameters import (
+    GromacsEnum,
+)
+from icolos.utils.enums.step_enums import StepGromacsEnum
+from pydantic import BaseModel
+from icolos.core.workflow_steps.gromacs.base import StepGromacsBase
+from icolos.utils.execute_external.gromacs import GromacsExecutor
+from icolos.utils.execute_external.execute import Executor
+from icolos.utils.execute_external.schrodinger import SchrodingerExecutor
+from icolos.core.workflow_steps.step import _LE
+import os
+import re
+from typing import AnyStr, List
+from string import ascii_uppercase
+
+_SGE = StepGromacsEnum()
+_GE = GromacsEnum()
+
+
+class StepGMXPdb2gmx(StepGromacsBase, BaseModel):
+    _shell_executor: Executor = None
+    _antechamber_executor: Executor = None
+    _acpype_executor: Executor = None
+    _schrodinger_executor: SchrodingerExecutor = None
+
+    def __init__(self, **data):
+        """
+        Executes system parametrisation for gromacs MD setup
+        Generates GAFF params for unknown components with Antechamber
+        """
+        super().__init__(**data)
+
+        self._initialize_backend(executor=GromacsExecutor)
+        self._check_backend_availability()
+        self._shell_executor = Executor()
+        self._antechamber_executor = Executor(prefix_execution=_SGE.AMBERTOOLS_LOAD)
+
+    def _modify_topol_file(self, tmp_dir, itp_files):
+        # read in the complex topol file, add the new itp files after the forcefield #include statement
+        with open(os.path.join(tmp_dir, _SGE.COMPLEX_TOP), "r") as f:
+            lines = f.readlines()
+        index = [idx for idx, s in enumerate(lines) if _SGE.FORCEFIELD_ITP in s][0]
+        new_topol = lines[: index + 1]
+        for file in itp_files:
+            new_topol.append(f'#include "{file}"\n')
+        for line in lines[index + 1 :]:
+            new_topol.append(line)
+        for file in itp_files:
+            stub = file.split(".")[0]
+            new_topol.append(f"{stub}   1\n")
+        with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "w") as f:
+            f.writelines(new_topol)
+
+        # remove all but the final topol file form the paths, makes file handling cleaner later
+        top_files = [
+            f for f in os.listdir(tmp_dir) if f.endswith("top") and f != _SGE.STD_TOPOL
+        ]
+
+        for f in top_files:
+            os.remove(os.path.join(tmp_dir, f))
+
+    def _add_posre_to_topol(self, tmp_dir, lig):
+        """
+        Add lines to topol file to invoke positional restraints for the parametrised ligands
+        """
+        stub = lig.split(".")[0]
+        lig_itp = stub + ".itp"
+        with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "r") as f:
+            lines = f.readlines()
+        index = [idx for idx, s in enumerate(lines) if lig_itp in s][0]
+        new_topol = lines[: index + 1]
+        new_topol.append(
+            f"#ifdef POSRES_{stub.upper()}\n#include posre_{stub}.itp\n#endif\n"
+        )
+        for line in lines[index + 1 :]:
+            new_topol.append(line)
+
+        with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "w") as f:
+            f.writelines(new_topol)
+
+    def _split_protein_ligand_complex(self, tmp_dir):
+        # split the file into protein and an arbitrary number of ligands and cofactors
+        # Handle for multiple cofactors of the same type
+        struct_file = [
+            file for file in os.listdir(tmp_dir) if file.endswith(_SGE.FIELD_KEY_PDB)
+        ][0]
+        with open(os.path.join(tmp_dir, struct_file), "r") as f:
+            data = f.readlines()
+        # handles arbitrary number of ligands, cofactors, etc
+        ligand_lines = {}
+        protein_lines = []
+
+        for line in data:
+            parts = line.upper().split()
+
+            # filter header lines etc
+            if len(parts) > 4 and parts[0] in _GE.ATOMS:
+
+                # catch the easy cases where there is a direct match to the parametrised components against internal dict
+                if (
+                    parts[3] in _GE.AMBER_PARAMETRISED_COMPONENTS
+                    or parts[3] in _GE.IONS
+                ):
+
+                    protein_lines.append(line)
+
+                # catch cases where ions have non-standard residue names e.g. NA3
+                elif parts[3][:2] in _GE.IONS and re.findall(
+                    re.compile(fr"{parts[3][:2]}[0-9]+"), line
+                ):
+
+                    pattern = fr"{parts[3][:2]}[0-9]+"
+                    pattern = re.compile(pattern)
+
+                    line = re.sub(pattern, parts[3][:2], line)
+                    protein_lines.append(line)
+
+                else:
+                    # component is not parametrised, add to the ligands
+                    if parts[4] in list(ascii_uppercase):
+                        try:
+                            ligand_lines[f"{parts[3]}:{parts[5]}"].append(line)
+                        except KeyError:
+                            # ligand key not created yet, identify by chain + res num to handle multiple identical components
+                            ligand_lines[f"{parts[3]}:{parts[5]}"] = [line]
+                    else:  # the 5th col index is the first coord col
+                        try:
+                            ligand_lines[f"{parts[3]}:{parts[4]}"].append(line)
+                        except KeyError:
+                            ligand_lines[f"{parts[3]}:{parts[4]}"] = [line]
+
+        for key, value in ligand_lines.items():
+            # write ligand components as separate pdb files
+            with open(os.path.join(tmp_dir, f"{key}.pdb"), "w") as f:
+                f.writelines(value)
+        with open(os.path.join(tmp_dir, _SGE.PROTEIN_PDB), "w") as f:
+            f.writelines(protein_lines)
+        self._remove_temporary(os.path.join(tmp_dir, struct_file))
+        return list(ligand_lines.keys())
+
+    def _parametrisation_pipeline(self, tmp_dir, input_pdb) -> None:
+        """
+        :param tmp_dir: step's base directory
+        :param input_pdb: file name for the ligand being parametrised
+        """
+        # main pipeline for producing GAFF parameters for a ligand
+        stub = input_pdb.split(".")[0]
+        output_file = stub + ".mol2"
+        arguments_antechamber = [
+            "-i",
+            input_pdb,
+            "-o",
+            output_file,
+            "-fi",
+            "pdb",
+            "-fo",
+            "mol2",
+            "-c",
+            "gas",
+        ]
+        self._logger.log(f"Running antechamber on structure {input_pdb}", _LE.DEBUG)
+        self._antechamber_executor.execute(
+            command=_GE.ANTECHAMBER,
+            arguments=arguments_antechamber,
+            check=True,
+            location=tmp_dir,
+        )
+
+        # Step 4: run the acpype script to generate the ligand topology file for GAFF
+        self._logger.log(f"Running acpype on structure {input_pdb}", _LE.DEBUG)
+        arguments_acpype = [
+            os.path.join(_GE.ACPYPE_PATH, _GE.ACPYPE_BINARY),
+            "-di",
+            output_file,
+            "-c",
+            "gas",
+        ]
+        self._antechamber_executor.execute(
+            command=_GE.PYTHON, arguments=arguments_acpype, location=tmp_dir, check=True
+        )
+        # produce the ndx file for genrestr later
+        index_file = stub + ".ndx"
+        ndx_arguments = ["-f", input_pdb, "-o", index_file]
+
+        self._backend_executor.execute(
+            command=_GE.MAKE_NDX,
+            arguments=ndx_arguments,
+            location=tmp_dir,
+            check=True,
+            pipe_input='echo -e "0 & ! a H* \nq"',  # all system heavy atoms, excl hydrogens
+        )
+        # generate positional restraints for the ligand
+        genrestr_args = [
+            "-f",
+            input_pdb,
+            "-n",
+            index_file,
+            "-o",
+            f"posre_{stub}.itp",
+            "-fc",
+            _SGE.FORCE_CONSTANTS,
+        ]
+        self._backend_executor.execute(
+            command=_GE.GENRESTR,
+            arguments=genrestr_args,
+            location=tmp_dir,
+            check=True,
+            pipe_input="echo 3",
+        )  # this will always be the last thing on the index file
+
+        # we no longer need the ligand ndx file
+        self._remove_temporary(os.path.join(tmp_dir, index_file))
+
+    def _sort_components(self, lig_ids: List, components: List):
+        """
+        Ensure components go back into the concatenated pdb file in the same order as the original
+        """
+        new_components = []
+        for idx in lig_ids:
+            for component in components:
+                if idx in component:
+                    new_components.append(component)
+        return new_components
+
+    def _concatenate_structures(self, tmp_dir: str, lig_ids: List):
+        """
+        Extract newly parametrised components, concatenate everything into a single pdb file
+        """
+
+        components = []
+        for root, _, files in os.walk(tmp_dir):
+            for file in files:
+                if file.endswith("_NEW.pdb"):
+                    components.append(os.path.join(root, file))
+        components = self._sort_components(lig_ids, components)
+        self._logger.log(f"Found components: {components}", _LE.DEBUG)
+        with open(os.path.join(tmp_dir, _SGE.PROTEIN_PDB), "r") as f:
+            pdb_lines = f.readlines()
+
+        for file in components:
+            with open(file, "r") as f:
+
+                pdb_lines.extend(f.readlines())
+
+        pdb_lines = [
+            l for l in pdb_lines if not any(s in l for s in ["TER", "ENDMDL", "REMARK"])
+        ]
+        pdb_lines.extend(["TER\n", "ENDMDL\n"])
+        with open(os.path.join(tmp_dir, "Complex.pdb"), "w") as f:
+            f.writelines(pdb_lines)
+
+        # also deal with renaming the itp files here
+        for root, _, files in os.walk(tmp_dir):
+            for item in files:
+                if (
+                    item.endswith("GMX.itp")
+                    and _SGE.PROTEIN_TOP not in item
+                    and os.path.join(root, item) != os.path.join(tmp_dir, item)
+                ):
+                    os.rename(
+                        os.path.join(root, item),
+                        os.path.join(tmp_dir, item.split("_")[0]) + ".itp",
+                    )
+        # rename the protein top to complex
+        os.rename(
+            os.path.join(tmp_dir, _SGE.PROTEIN_TOP),
+            os.path.join(tmp_dir, _SGE.COMPLEX_TOP),
+        )
+
+    def _extract_atomtype(self, tmp_dir: str, file: str) -> List[AnyStr]:
+        """
+        Pull the atomtype lines out of the topol file and return them as a list, write the sanitised itp file to directory
+        """
+        with open(os.path.join(tmp_dir, file), "r") as f:
+            lines = f.readlines()
+        start_index = None
+        stop_index = None
+        for idx, line in enumerate(lines):
+            if _GE.ATOMTYPES in line:
+                start_index = idx
+            if _GE.MOLECULETYPES in line:
+                stop_index = idx
+
+        selection = lines[start_index:stop_index]
+        # remove the offending lines from the topol
+        remaining = lines[:start_index]
+        remaining.extend(lines[stop_index:])
+        self._remove_temporary(os.path.join(tmp_dir, file))
+        with open(os.path.join(tmp_dir, file), "w") as f:
+            f.writelines(remaining)
+        return selection
+
+    def _remove_duplicate_atomtypes(self, atomtypes: List):
+        output = [atomtypes[0]]
+        for line in atomtypes:
+            if line not in output:
+                output.append(line)
+        return output
+
+    def _modify_itp_files(self, tmp_dir):
+        # cut the moleculetype directives out of all the individual itp files and add them to the top of the topol
+        atomtype_lines = []
+        # read the topol file, identify all the itp files it is #including
+        with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "r") as f:
+            topol_lines = [
+                l.split()[-1].strip('"')
+                for l in f.readlines()
+                if ".itp" in l and "posre" not in l
+            ]
+        topol_lines = [l for l in topol_lines if l in os.listdir(tmp_dir)]
+        for file in topol_lines:
+            atomtype_lines.extend(self._extract_atomtype(tmp_dir, file))
+        atomtype_lines = self._remove_duplicate_atomtypes(atomtype_lines)
+
+        # write an 'atomtypes.itp' files to be included just below the forcefield, with all the atomtypes contained in the extra components
+        with open(os.path.join(tmp_dir, "atomtypes.itp"), "w") as f:
+            f.writelines(atomtype_lines)
+
+        with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "r") as f:
+            lines = f.readlines()
+        self._remove_temporary(os.path.join(tmp_dir, _SGE.STD_TOPOL))
+        index = [idx for idx, s in enumerate(lines) if _SGE.FORCEFIELD_ITP in s][0]
+        new_topol = lines[: index + 1]
+
+        new_topol.append('#include "atomtypes.itp"\n')
+        new_topol.extend(lines[index + 1 :])
+        with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "w") as f:
+            f.writelines(new_topol)
+
+    def _modify_water_molecules(self, tmp_dir: str):
+        with open(os.path.join(tmp_dir, _SGE.COMPLEX_PDB), "r") as f:
+            lines = f.readlines()
+
+        solvent = []
+        # pick out the water lines
+        for line in lines:
+            if any([x in line for x in _GE.SOLVENTS]):
+                solvent.append(line)
+        for line in solvent:
+            lines.remove(line)
+        lines.extend(solvent)
+        for line in lines:
+            if any([x in line for x in _GE.TERMINATIONS]):
+                lines.remove(line)
+
+        with open(os.path.join(tmp_dir, _SGE.COMPLEX_PDB), "w") as f:
+            f.writelines(lines)
+
+        if solvent:
+            # modify the topol to put the solvent in last in the [ molecules ] directive
+            with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "r") as f:
+                lines = f.readlines()
+            molecule_idx = lines.index(_GE.MOLECULES)
+            for line in lines[molecule_idx:]:
+                if any([x in line for x in _GE.SOLVENTS]):
+                    out = lines.pop(lines.index(line))
+                    lines.append(out)
+            with open(os.path.join(tmp_dir, _SGE.STD_TOPOL), "w") as f:
+                f.writelines(lines)
+
+    def execute(self):
+        """Takes in a ligand pdb file and generates the required topology, based on the backend specified in the config json.
+        Currently supported AnteChamber
+
+        Execution looks like this currently:
+        (1) split the protein from the other components
+        (2) generate the topology for the protein separately
+        (3) identify components to be parametrised (cofactors, ligands etc)
+        (4) run the parametrisation pipeline on each component in serial (reasonably fast exec time per ligand)
+            (4a) store the resIDs of the ligands using the file handling system to be retrieved in a later step
+        (5) modify the topology file to add the #include statements for the relevant itp files
+        (6) convert the resulting concatenated pdb file to .gro with editconf
+        (7) add the posres stuff to the topol file for each ligand for the subsequent equilibration steps
+        (8) if more than one ligand, modify the itp files to ensure all moleculetype directives are specified first
+        """
+
+        tmp_dir = self._make_tmpdir()
+        self._write_input_files(tmp_dir)  # dump generic data fields to the tmpdir
+        lig_ids = self._split_protein_ligand_complex(tmp_dir)
+        self._logger.log(
+            f"Parameters will be generated for the following components: {str(lig_ids)}",
+            _LE.DEBUG,
+        )
+
+        # Step 2: run pdb2gmx on the protein component only
+
+        arguments_pdb2gmx = self._parse_arguments(
+            flag_dict={
+                "-f": os.path.join(tmp_dir, _SGE.PROTEIN_PDB),
+                "-o": os.path.join(tmp_dir, _SGE.PROTEIN_PDB),
+                "-p": _SGE.PROTEIN_TOP,
+            }
+        )
+        self._backend_executor.execute(
+            command=_GE.PDB2GMX, arguments=arguments_pdb2gmx, location=tmp_dir
+        )
+
+        for lig in lig_ids:
+            input_file = lig + ".pdb"
+            # generate the itp files for each component, named by their PDB identifier
+            self._parametrisation_pipeline(tmp_dir, input_file)
+
+        # concatenate the structures to produce Complex.pdb
+        if lig_ids:
+            self._concatenate_structures(tmp_dir, lig_ids)
+            # step 6: Modify protein topol file for ligand
+            itp_files = [
+                f
+                for f in os.listdir(tmp_dir)
+                if f.endswith(".itp")
+                and "posre" not in f
+                and not any(
+                    [x in f for x in _GE.PRIMARY_COMPONENTS]
+                )  # avoid any duplicated itp file entries from components of the protein already handles by pdb2gmx (TODO: makes sure this works for DNA/RNA as well)
+            ]
+            # need to sort the itp files to match the ordering from the original pdb structure
+            itp_files = self._sort_components(lig_ids, itp_files)
+            self._modify_topol_file(tmp_dir, itp_files)
+
+            # step 10: modify the topol file to add the ligand posre file if restraints are applied
+            for lig in lig_ids:
+                self._add_posre_to_topol(tmp_dir, lig)
+
+            # if more than two ligands present, modify the ligand itp files so all the [atomtype] directives come before the [moleculetype] directives in the full topol
+            if len(lig_ids) > 1:
+                self._modify_itp_files(tmp_dir)
+
+        else:
+            # just convert the file names in place, no addition of ligands
+            os.rename(
+                os.path.join(tmp_dir, _SGE.PROTEIN_TOP),
+                os.path.join(tmp_dir, _SGE.STD_TOPOL),
+            )
+            os.rename(
+                os.path.join(tmp_dir, _SGE.PROTEIN_PDB),
+                os.path.join(tmp_dir, _SGE.COMPLEX_PDB),
+            )
+
+            # step 7: run editconf to convert the combined pdb to a gro file
+
+        # do final check to move crystallographic waters to the end of the pdb file, after
+        # the ligand, to ensure continuous solvent group later
+        self._modify_water_molecules(tmp_dir)
+        # and adjust the topol file to put any solvent last
+
+        editconf_arguments = ["-f", _SGE.COMPLEX_PDB, "-o", "structure.gro"]
+        self._backend_executor.execute(
+            command=_GE.EDITCONF,
+            arguments=editconf_arguments,
+            location=tmp_dir,
+            check=True,
+        )
+
+        self._parse_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/gromacs/rsmd.py b/icolos/core/workflow_steps/gromacs/rsmd.py
new file mode 100644
index 0000000..5ad224f
--- /dev/null
+++ b/icolos/core/workflow_steps/gromacs/rsmd.py
@@ -0,0 +1,59 @@
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from icolos.utils.enums.program_parameters import GromacsEnum
+from icolos.core.workflow_steps.gromacs.base import StepGromacsBase
+from icolos.utils.execute_external.gromacs import GromacsExecutor
+from pydantic import BaseModel
+from icolos.core.workflow_steps.step import _LE
+import os
+
+_GE = GromacsEnum()
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+
+
+class StepGMXrmsd(StepGromacsBase, BaseModel):
+    """
+    Run gromacs rmsd calculation on trajectory
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=GromacsExecutor)
+        self._check_backend_availability()
+
+    def execute(self):
+
+        tmp_dir = self._make_tmpdir()
+
+        # we're going to get a trajectory from a trjconv step, and a single structure to
+        # compare against. rmsd is computed for every step of the trj file
+
+        # write out generic files
+        self._write_input_files(tmp_dir)
+
+        # conformer coming from a Compound object
+        conf = self._unroll_compounds(self.data.compounds)
+
+        conf = conf[0]
+        conf.write(os.path.join(tmp_dir, "reference.sdf"), format_="pdb")
+
+        flag_dict = {
+            "-s": "reference.pdb",
+            "-f": self.data.generic.get_argument_by_extension("xtc"),
+            "-fit": "rot+trans",
+        }
+
+        arguments = self._parse_arguments(flag_dict=flag_dict, args=["-w"])
+        result = self._backend_executor.execute(
+            command=_GE.RMS,
+            arguments=arguments,
+            location=tmp_dir,
+            check=True,
+            pipe_input='echo -e "2\n2\n"',
+        )
+        for line in result.stdout.split("\n"):
+            self._logger_blank.log(line, _LE.DEBUG)
+
+        self._parse_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/gromacs/solvate.py b/icolos/core/workflow_steps/gromacs/solvate.py
new file mode 100644
index 0000000..1f30d2f
--- /dev/null
+++ b/icolos/core/workflow_steps/gromacs/solvate.py
@@ -0,0 +1,46 @@
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from icolos.utils.enums.program_parameters import GromacsEnum
+from icolos.utils.execute_external.gromacs import GromacsExecutor
+from pydantic import BaseModel
+from icolos.core.workflow_steps.step import _LE
+from icolos.core.workflow_steps.gromacs.base import StepGromacsBase
+
+_GE = GromacsEnum()
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+
+
+class StepGMXSolvate(StepGromacsBase, BaseModel):
+    """
+    Fill waterbox with solvent, executes gmx solvate
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=GromacsExecutor)
+        self._check_backend_availability()
+
+    def execute(self):
+        tmp_dir = self._make_tmpdir()
+        self._write_input_files(tmp_dir)
+        structure_file = self.data.generic.get_argument_by_extension(
+            _SGE.FIELD_KEY_STRUCTURE
+        )
+        arguments = self._parse_arguments(
+            flag_dict={
+                "-cp": structure_file,
+                "-p": self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TOPOL),
+                "-o": structure_file,
+            }
+        )
+        result = self._backend_executor.execute(
+            command=_GE.SOLVATE, arguments=arguments, location=tmp_dir
+        )
+        for line in result.stdout.split("\n"):
+            self._logger_blank.log(line, _LE.DEBUG)
+        self._logger.log(
+            f"Completed execution for {self.step_id} successfully.", _LE.INFO
+        )
+        self._parse_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/gromacs/trajcat.py b/icolos/core/workflow_steps/gromacs/trajcat.py
new file mode 100644
index 0000000..c2b5fc9
--- /dev/null
+++ b/icolos/core/workflow_steps/gromacs/trajcat.py
@@ -0,0 +1,54 @@
+from enum import Flag
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from icolos.utils.enums.program_parameters import GromacsEnum
+from icolos.core.workflow_steps.gromacs.base import StepGromacsBase
+from icolos.utils.execute_external.gromacs import GromacsExecutor
+from pydantic import BaseModel
+from icolos.core.workflow_steps.step import _LE
+import os
+
+_GE = GromacsEnum()
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+
+
+class StepGMXTrjcat(StepGromacsBase, BaseModel):
+    """
+    Concatenates multiple trajectories, useful for subsequent rmsd/cluster calculations
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=GromacsExecutor)
+        self._check_backend_availability()
+
+    def execute(self):
+
+        tmp_dir = self._make_tmpdir()
+
+        # write the trajectories to the tmpdir, writing to separate file names, then glob the xtc files
+
+        for idx, file in enumerate(self.data.generic.get_files_by_extension(ext="xtc")):
+            file.write(path=os.path.join(tmp_dir, f"traj_{idx}.xtc"), join=False)
+
+        flag_dict = {
+            "-f": "*.xtc",
+            "-o": "trjcat_out.xtc",
+            "-cat": "",  # need this to paste the trajectories back to back
+        }
+
+        arguments = self._parse_arguments(flag_dict=flag_dict)
+        result = self._backend_executor.execute(
+            command=_GE.TRJCAT, arguments=arguments, location=tmp_dir, check=True
+        )
+        for line in result.stdout.split("\n"):
+            self._logger_blank.log(line, _LE.DEBUG)
+
+        rm_files = [
+            f for f in os.listdir(tmp_dir) if f.endswith("xtc") and "trjcat" not in f
+        ]
+        for f in rm_files:
+            os.remove(os.path.join(tmp_dir, f))
+        self._parse_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/gromacs/trjconv.py b/icolos/core/workflow_steps/gromacs/trjconv.py
new file mode 100644
index 0000000..bafa0ba
--- /dev/null
+++ b/icolos/core/workflow_steps/gromacs/trjconv.py
@@ -0,0 +1,50 @@
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from icolos.utils.enums.program_parameters import GromacsEnum
+from icolos.core.workflow_steps.gromacs.base import StepGromacsBase
+from icolos.utils.execute_external.gromacs import GromacsExecutor
+from pydantic import BaseModel
+from icolos.core.workflow_steps.step import _LE
+
+_GE = GromacsEnum()
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+
+
+class StepGMXTrjconv(StepGromacsBase, BaseModel):
+    """
+    Postprocessing step for gromacs trajectories
+    Mostly used for removing pbc, fitting trajectory etc.
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=GromacsExecutor)
+        self._check_backend_availability()
+
+    def execute(self):
+
+        tmp_dir = self._make_tmpdir()
+        self._write_input_files(tmp_dir)
+
+        xtc_file = self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_XTC)
+        flag_dict = {
+            "-s": self.data.generic.get_argument_by_extension(_SGE.FIELD_KEY_TPR),
+            "-f": xtc_file,
+            "-o": xtc_file,
+        }
+
+        arguments = self._parse_arguments(flag_dict=flag_dict)
+        result = self._backend_executor.execute(
+            command=_GE.TRJCONV,
+            arguments=arguments,
+            location=tmp_dir,
+            check=True,
+            pipe_input=self.construct_pipe_arguments(
+                tmp_dir, self.settings.additional[_SBE.PIPE_INPUT]
+            ),
+        )
+        for line in result.stdout.split("\n"):
+            self._logger_blank.log(line, _LE.DEBUG)
+        self._parse_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/io/__init__.py b/icolos/core/workflow_steps/io/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/core/workflow_steps/io/base.py b/icolos/core/workflow_steps/io/base.py
new file mode 100644
index 0000000..58e16dd
--- /dev/null
+++ b/icolos/core/workflow_steps/io/base.py
@@ -0,0 +1,10 @@
+from pydantic import BaseModel
+from icolos.core.workflow_steps.step import StepBase
+from icolos.utils.enums.step_enums import StepBaseEnum
+
+_SBE = StepBaseEnum
+
+
+class StepIOBase(StepBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
diff --git a/icolos/core/workflow_steps/io/data_manipulation.py b/icolos/core/workflow_steps/io/data_manipulation.py
new file mode 100644
index 0000000..01b3e69
--- /dev/null
+++ b/icolos/core/workflow_steps/io/data_manipulation.py
@@ -0,0 +1,248 @@
+from typing import List, Union
+from pydantic import BaseModel
+
+from icolos.core.containers.compound import unroll_conformers
+from icolos.core.step_utils.structcat_util import StructcatUtil
+from icolos.core.step_utils.structconvert import StructConvert
+from icolos.utils.enums.program_parameters import (
+    OpenBabelEnum,
+    SchrodingerExecutablesEnum,
+)
+from icolos.utils.enums.step_enums import (
+    StepDataManipulationEnum,
+    StepBaseEnum,
+    StepFilterEnum,
+)
+from icolos.core.workflow_steps.io.base import StepIOBase
+import os
+from icolos.core.workflow_steps.step import _LE
+import numpy as np
+
+_SBE = StepBaseEnum
+_SDM = StepDataManipulationEnum()
+_SEE = SchrodingerExecutablesEnum()
+_OE = OpenBabelEnum()
+_SFE = StepFilterEnum()
+
+
+class StepDataManipulation(StepIOBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # extend parameters
+        if _SDM.ACTION not in self.settings.additional.keys():
+            self.settings.additional[
+                _SDM.ACTION
+            ] = _SDM.ACTION_ATTACH_CONFORMERS_AS_EXTRA
+            self._logger.log(
+                f"Action not specified, defaulting to {_SDM.ACTION_ATTACH_CONFORMERS_AS_EXTRA}.",
+                _LE.WARNING,
+            )
+
+    def _attach_conformers_as_extra(self):
+        # load data to match from previous step (note: no other input supported here, to avoid redudancy
+        # with standard input preparation)
+        match_compounds = (
+            self.get_workflow_object()
+            .find_step_by_step_id(self.settings.additional[_SDM.MATCH_SOURCE])
+            .clone_compounds()
+        )
+
+        # unroll for convenience, attach matches to input conformers as extra data
+        match_conformers = unroll_conformers(match_compounds)
+        for comp in self.get_compounds():
+            for enum in comp:
+                for conf in enum:
+                    list_matched = [
+                        c
+                        for c in match_conformers
+                        if conf.get_index_string() == c.get_index_string()
+                    ]
+                    conf.add_extra_data(key=_SDM.KEY_MATCHED, data=list_matched)
+                    self._logger.log(
+                        f"Added {len(list_matched)} conformers as extra data to conformer {conf.get_index_string()}.",
+                        _LE.DEBUG,
+                    )
+
+    def _convert_mae_to_pdb(self):
+        converter = StructConvert(prefix_execution=_SEE.SCHRODINGER_MODULE)
+        tmp_dir = self._make_tmpdir()
+
+        # find the mae files from the input step and convert to pdb
+        for file in self.data.generic.get_files_by_extension("mae"):
+            file.write(tmp_dir)
+            output_file = file.get_file_name().split(".")[0] + ".pdb"
+            converter.mae2pdb(
+                os.path.join(tmp_dir, file.get_file_name()),
+                os.path.join(tmp_dir, output_file),
+            )
+        self._parse_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
+
+    def _assemble_complexes(self):
+        concatenator = StructcatUtil(
+            prefix_execution=_SEE.SCHRODINGER_MODULE, backend=_OE.OBABEL
+        )
+        assert os.path.isfile(self.settings.additional[_SDM.RECEPTOR])
+        # create a tmpdir to work in
+        tmp_dir = self._make_tmpdir()
+        # get compounds from previous step
+        conformers = self._unroll_compounds(self.get_compounds(), level="conformers")
+        for conf in conformers:
+            path = os.path.join(tmp_dir, f"{conf.get_index_string()}.sdf")
+            mol = conf.get_molecule()
+            conf.write(path)
+            concatenator.concatenate(
+                input_files=[
+                    self.settings.additional[_SDM.RECEPTOR],
+                    path,
+                ],
+                output_file=os.path.join(tmp_dir, f"{conf.get_index_string()}.pdb"),
+            )
+        self._parse_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
+
+    def _filter_compounds(self):
+        """modifies set of input compounds according to the specification provided in the config block"""
+        # TODO: support ranking structures based on generic data
+
+        top_n = self.settings.additional[_SFE.RETURN_N]
+        reverse = self.settings.additional[_SFE.HIGHEST_IS_BEST]
+        criteria = (
+            self.settings.additional[_SFE.CRITERIA]
+            if _SFE.CRITERIA in self.settings.additional.keys()
+            else None
+        )
+        aggregation = (
+            self.settings.additional[_SFE.AGGREGATION]
+            if _SFE.AGGREGATION in self.settings.additional.keys()
+            else "sum"
+        )
+
+        top_conformer_list = []
+        for compound in self.data.compounds:
+            # filter by enumeration first - return a list of the top scoring conformers for that enumeration
+            # this is the normal running mode, as opposed to sorting by compound, regardless of the enumeration it came from
+            for enumeration in compound.get_enumerations():
+                enumeration.sort_conformers(
+                    by_tag=criteria, reverse=reverse, aggregation=aggregation
+                )
+                top_confs = enumeration.get_conformers()[:top_n]
+                enumeration.clear_conformers()
+                enumeration.add_conformers(top_confs)
+                # replace that enumeration's conformers with the sorted list.
+                # if filtering at conformer level i.e. regardless of enumeration
+                if self.settings.additional[_SFE.FILTER_LEVEL] == _SFE.COMPOUNDS:
+                    for conf in top_confs:
+                        top_conformer_list.append(conf)
+        if self.settings.additional[_SFE.FILTER_LEVEL] == _SFE.COMPOUNDS:
+            # sort the top conformers from each enumeration and attach the top n conformers to their respective enumeration, get rid of the rest
+            # sorted_top_confs = sorted(top_conformer_list,
+            #                           key=lambda x: x.get_molecule().GetProp(self.settings.additional[_SFE.CRITERIA]),
+            #                           reverse=reverse)[:top_n]
+            # sort conformers
+            sorted_top_confs = self._sort_conformers(
+                conformers=top_conformer_list,
+                by_tag=criteria,
+                reverse=reverse,
+                aggregation=aggregation,
+            )
+            for compound in self.data.compounds:
+                for enum in compound.get_enumerations():
+                    enum.clear_conformers()
+            for conf in sorted_top_confs:
+                enum = conf.get_enumeration_object()
+                enum.add_conformer(conf)
+
+    def _sort_conformers(
+        self,
+        conformers,
+        by_tag: Union[str, List[str]],
+        reverse: bool = True,
+        aggregation="sum",
+    ):
+        if isinstance(by_tag, list) and len(by_tag) == 1:
+            by_tag = by_tag[0]
+
+        if isinstance(by_tag, str):
+            # sorting according to a single tag
+            conformers = sorted(
+                conformers,
+                key=lambda x: float(x.get_molecule().GetProp(by_tag)),
+                reverse=reverse,
+            )
+            return conformers
+            # self._conformers = conformers
+            # self.reset_conformer_ids()
+        elif isinstance(by_tag, list):
+            # need to normalise the values, calculate max and min of each tag for that series of conformers provided
+            # this would allow us to compare across a series, i.e. scoring and ranking the output of all conformers in an enumeration from Glide
+            def normalise_tag(value, tag):
+                all_tag_values = [
+                    float(conf.get_molecule().GetProp(tag)) for conf in conformers
+                ]
+                if len(all_tag_values) == 1:
+                    return value
+                else:
+
+                    max_tag = np.max(all_tag_values)
+                    min_tag = np.min(all_tag_values)
+                    return (float(value) - min_tag) / (max_tag - min_tag)
+
+            # if we specify multiple tags, aggregate according the the provided aggregation function
+            if aggregation == "sum":
+                # sort by the sum of the normalised tags,
+                conformers = sorted(
+                    conformers,
+                    key=lambda x: np.sum(
+                        [
+                            float(normalise_tag(x.get_molecule().GetProp(i), i))
+                            for i in by_tag
+                        ]
+                    ),
+                    reverse=reverse,
+                )
+                return conformers
+            elif aggregation == "product":
+                conformers = sorted(
+                    conformers,
+                    key=lambda x: np.product(
+                        [
+                            float(normalise_tag(x.get_molecule().GetProp(i), i))
+                            for i in by_tag
+                        ]
+                    ),
+                    reverse=reverse,
+                )
+                return conformers
+            else:
+                raise AttributeError(
+                    "Only sum or product aggregation modes are currently supported - ABORT"
+                )
+
+    def execute(self):
+        if (
+            self.settings.additional[_SDM.ACTION]
+            == _SDM.ACTION_ATTACH_CONFORMERS_AS_EXTRA
+        ):
+            self._attach_conformers_as_extra()
+        elif self.settings.additional[_SDM.ACTION] == _SDM.ACTION_NO_ACTION:
+            n_comp, n_enum, n_conf = self.get_compound_stats()
+            self._logger.log(
+                f'Data manipulation step type "no_action" for {n_comp} compounds with {n_enum} enumerations with {n_conf} conformers completed.',
+                _LE.INFO,
+            )
+        elif self.settings.additional[_SDM.ACTION] == _SDM.CONVERT_MAE_TO_PDB:
+            self._convert_mae_to_pdb()
+        elif self.settings.additional[_SDM.ACTION] == _SDM.ASSEMBLE_COMPLEXES:
+            # take pose conformers (sd format) and concatenate with pdb file
+            self._assemble_complexes()
+        elif self.settings.additional[_SDM.ACTION] == _SDM.COLLECT_ITERATOR_RESULTS:
+            # average the results coming from all iterations of the step
+            raise NotImplementedError
+        elif self.settings.additional[_SDM.ACTION] == _SDM.FILTER:
+            self._filter_compounds()
+        else:
+            raise ValueError(
+                f'Action "{self.settings.additional[_SDM.ACTION]}" not supported.'
+            )
diff --git a/icolos/core/workflow_steps/io/embedder.py b/icolos/core/workflow_steps/io/embedder.py
new file mode 100644
index 0000000..884c522
--- /dev/null
+++ b/icolos/core/workflow_steps/io/embedder.py
@@ -0,0 +1,136 @@
+from copy import deepcopy
+
+from pydantic import BaseModel
+from rdkit import Chem, RDLogger
+from rdkit.Chem import AllChem
+
+from icolos.core.containers.compound import Conformer
+from icolos.utils.general.icolos_exceptions import StepFailed
+from icolos.utils.enums.step_enums import StepEmbeddingEnum
+from icolos.core.workflow_steps.io.base import StepIOBase
+
+from icolos.core.workflow_steps.step import _LE
+from icolos.utils.general.convenience_functions import *
+from icolos.utils.smiles import to_mol
+
+_SEE = StepEmbeddingEnum()
+
+
+class StepEmbedding(StepIOBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # extend parameters with defaults
+        if _SEE.EMBED_AS not in self.settings.additional.keys():
+            self.settings.additional[_SEE.EMBED_AS] = _SEE.EMBED_AS_ENUMERATIONS
+            self._logger.log(
+                f'No embedding level specified, defaulting to "{_SEE.EMBED_AS_ENUMERATIONS}".',
+                _LE.INFO,
+            )
+
+    def _smile_to_molecule(self, smile: str) -> Chem.Mol:
+        mol = to_mol(smile)
+        if mol is None:
+            self._logger.log(
+                f"The smile {smile} could not be transformed into a molecule and will be skipped.",
+                _LE.WARNING,
+            )
+        return mol
+
+    def _embed_with_RDKit(self, smile: str, parameters: dict) -> Chem.Mol:
+        molecule = self._smile_to_molecule(smile)
+
+        # deactivate logger to suppress "missing Hs messages"
+        RDLogger.DisableLog("rdApp.*")
+        embed_code = AllChem.EmbedMolecule(
+            molecule, randomSeed=42, useRandomCoords=True
+        )
+
+        status = 0
+        if embed_code != -1:
+            status = AllChem.UFFOptimizeMolecule(molecule, maxIters=600)
+            if status == 1:
+                self._logger.log(
+                    f"The 3D coordinate generation of molecule {smile} did not converge in time.",
+                    _LE.WARNING,
+                )
+        else:
+            self._logger.log(
+                f"Could not embed molecule {smile} - no 3D coordinates have been generated.",
+                _LE.WARNING,
+            )
+        RDLogger.EnableLog("rdApp.*")
+
+        # add hydrogens to the molecule (if specified)
+        if nested_get(parameters, [_SEE.RDKIT_PROTONATE], default=True):
+            molecule = Chem.AddHs(molecule, addCoords=True)
+
+        if embed_code != -1 and status == 0:
+            return molecule
+
+    def _get_embedding_method(self, parameters: dict) -> str:
+        method = nested_get(parameters, [_SEE.METHOD], default=None)
+        if method is None:
+            error = "Embedding method not set."
+            self._logger.log(error, _LE.ERROR)
+            raise StepFailed(error)
+        return method.upper()
+
+    def _embed_molecule(self, smile: str, parameters: dict) -> Chem.Mol:
+        method = self._get_embedding_method(parameters)
+        if method == _SEE.METHOD_RDKIT:
+            return self._embed_with_RDKit(smile, parameters)
+        else:
+            self._logger.log(
+                f"Specified embedding method {method} not available.", _LE.ERROR
+            )
+
+    def execute(self):
+        # TODO: REFACTOR
+        parameters = deepcopy(self.settings.arguments.parameters)
+        embed_as = self.settings.additional[_SEE.EMBED_AS]
+        for compound in self.get_compounds():
+            if embed_as == _SEE.EMBED_AS_ENUMERATIONS:
+                for enumeration in compound.get_enumerations():
+                    enumeration.clear_molecule()
+                    enumeration.clear_conformers()
+                    molecule = self._embed_molecule(
+                        smile=enumeration.get_smile(), parameters=parameters
+                    )
+                    enumeration.set_molecule(molecule)
+                number_successful = len(
+                    [
+                        True
+                        for enum in compound.get_enumerations()
+                        if enum.get_molecule() is not None
+                    ]
+                )
+                self._logger.log(
+                    f"Embedding for compound {compound.get_index_string()} (name: {compound.get_name()}) completed ({number_successful} of {len(compound)} enumerations successful).",
+                    _LE.INFO,
+                )
+            elif embed_as == _SEE.EMBED_AS_CONFORMERS:
+                for enumeration in compound.get_enumerations():
+                    enumeration.clear_conformers()
+                    molecule = self._embed_molecule(
+                        smile=enumeration.get_smile(), parameters=parameters
+                    )
+                    conformer = Conformer(
+                        conformer=molecule, enumeration_object=enumeration
+                    )
+                    enumeration.add_conformer(conformer, auto_update=True)
+                number_successful = len(
+                    [
+                        True
+                        for enum in compound.get_enumerations()
+                        if enum[0].get_molecule() is not None
+                    ]
+                )
+                self._logger.log(
+                    f"Embedding for compound {compound.get_index_string()} (name: {compound.get_name()}) completed ({number_successful} of {len(compound)} enumerations successful).",
+                    _LE.INFO,
+                )
+            else:
+                ValueError(
+                    f'Value "{embed_as}" for parameter "embed_as" not supported.'
+                )
diff --git a/icolos/core/workflow_steps/io/initialize_compound.py b/icolos/core/workflow_steps/io/initialize_compound.py
new file mode 100644
index 0000000..36644cf
--- /dev/null
+++ b/icolos/core/workflow_steps/io/initialize_compound.py
@@ -0,0 +1,20 @@
+from pydantic import BaseModel
+
+from icolos.utils.general.icolos_exceptions import StepFailed
+from icolos.core.workflow_steps.io.base import StepIOBase
+from icolos.core.workflow_steps.step import _LE
+
+
+class StepInitializeCompound(StepIOBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def execute(self):
+        if len(self.data.compounds) == 0:
+            raise StepFailed(
+                "Compound initialization step failed - no Compound objects generated."
+            )
+        self._logger.log(
+            f"Step {self.get_step_id()} initialized {len(self.get_compounds())} compounds.",
+            _LE.INFO,
+        )
diff --git a/icolos/core/workflow_steps/pmx/__init__.py b/icolos/core/workflow_steps/pmx/__init__.py
new file mode 100644
index 0000000..1e5e115
--- /dev/null
+++ b/icolos/core/workflow_steps/pmx/__init__.py
@@ -0,0 +1,14 @@
+from icolos.core.workflow_steps.pmx.atomMapping import StepPMXatomMapping
+from icolos.core.workflow_steps.pmx.doublebox import StepPMXdoublebox
+from icolos.core.workflow_steps.pmx.genlib import StepPMXgenlib
+from icolos.core.workflow_steps.pmx.gentop import StepPMXgentop
+from icolos.core.workflow_steps.pmx.ligandHybrid import StepPMXligandHybrid
+from icolos.core.workflow_steps.pmx.mutate import StepPMXmutate
+from icolos.core.workflow_steps.pmx.abfe import StepPMXabfe
+from icolos.core.workflow_steps.pmx.box_water_ions import StepPMXBoxWaterIons
+from icolos.core.workflow_steps.pmx.prepare_simulations import StepPMXPrepareSimulations
+from icolos.core.workflow_steps.pmx.prepare_transitions import StepPMXPrepareTransitions
+from icolos.core.workflow_steps.pmx.run_analysis import StepPMXRunAnalysis
+from icolos.core.workflow_steps.pmx.setup_workpath import StepPMXSetup
+from icolos.core.workflow_steps.pmx.run_simulations import StepPMXRunSimulations
+from icolos.core.workflow_steps.pmx.assemble_systems import StepPMXAssembleSystems
diff --git a/icolos/core/workflow_steps/pmx/abfe.py b/icolos/core/workflow_steps/pmx/abfe.py
new file mode 100644
index 0000000..1c78c9f
--- /dev/null
+++ b/icolos/core/workflow_steps/pmx/abfe.py
@@ -0,0 +1,149 @@
+from icolos.core.workflow_steps.pmx.base import StepPMXBase
+from pydantic import BaseModel
+import os
+from icolos.utils.enums.step_enums import StepGromacsEnum
+from icolos.utils.execute_external.gromacs import GromacsExecutor
+from icolos.utils.execute_external.pmx import PMXExecutor
+from icolos.utils.enums.program_parameters import (
+    GromacsEnum,
+    PMXEnum,
+    PMXAtomMappingEnum,
+)
+
+_PE = PMXEnum()
+_PAE = PMXAtomMappingEnum()
+_GE = GromacsEnum()
+_SGE = StepGromacsEnum()
+
+
+class StepPMXabfe(StepPMXBase, BaseModel):
+    """Setup files for an ABFE calculation."""
+
+    _gromacs_executor: GromacsExecutor = GromacsExecutor()
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        self._initialize_backend(PMXExecutor)
+        self._check_backend_availability()
+        self._gromacs_executor = GromacsExecutor(prefix_execution=_SGE.GROMACS_LOAD)
+
+    def _separate_protein_ligand(self):
+        # separate out protein and ligand lines from the written complex.pdb
+
+        with open(os.path.join(self.work_dir, "complex.pdb"), "r") as f:
+            lines = f.readlines()
+        protein_lines = []
+        ligand_lines = []
+        # TODO: tighten up the logic for identifying the ligand here
+        for line in lines:
+            if "ATOM" in line:
+                protein_lines.append(line)
+            elif "HETATM" in line and "HOH" not in line:
+                ligand_lines.append(line)
+
+        with open(os.path.join(self.work_dir, "protein.pdb"), "w") as f:
+            f.writelines(protein_lines)
+
+        with open(os.path.join(self.work_dir, "MOL.pdb"), "w") as f:
+            f.writelines(ligand_lines)
+
+        os.remove(os.path.join(self.work_dir, "complex.pdb"))
+
+    def execute(self):
+        """
+        Required inputs:
+        + Protein.top, protein.gro
+        + ligand.itp, ligand.gro
+
+
+        Execution:
+            - Separete protein and ligand from complex
+            - run pdb2gmx on protein -> generate protein.top, ligand.grp
+            - run acpype on ligand -> generate ligand.itp, ligand.gro
+            - run pmx abfe to set up the system, done!
+        """
+        # use the same single dir setup as for the rest of the pmx pipeline
+
+        assert self.work_dir is not None and os.path.isdir(self.work_dir)
+
+        complex_file = self.data.generic.get_argument_by_extension(
+            "pdb", rtn_file_object=True
+        )
+        complex_file.write(os.path.join(self.work_dir, "complex.pdb"), join=False)
+
+        self._separate_protein_ligand()
+
+        # parametrise the ligand, generate the itp files, top and gro files for the ligand
+        self._parametrisation_pipeline(
+            self.work_dir, include_gro=True, include_top=True
+        )
+
+        # parametrise protein
+        self._parametrise_protein(protein="protein.pdb", path="", output="protein.gro")
+
+        # run abfe
+
+        args = {
+            "-pt": "topol.top",
+            "-lt": "MOL.itp",
+            "-pc": "protein.gro",
+            "-lc": "MOL_GMX.gro",
+        }
+        self._backend_executor.execute(
+            command=_PE.ABFE,
+            arguments=self.get_arguments(args),
+            location=self.work_dir,
+            check=True,
+        )
+
+
+help_string = """
+pmx abfe -h
+usage: pmx [-h] [-pt protop] [-lt ligtop] [-pc procrd] [-lc ligcrd] [--build]
+           [--doublebox] [--longest_axis] [--keep_intra] [--lig_ids  ]
+           [--pro_ids  ] [--restr_switch_on] [--seed int]
+
+This scripts helps to setup an absolute binding free energy calculation. As a
+minimal input, you need to provide a structure and topology file for both the
+protein (or host) and ligand (or guest) molecule. The topology is setup so to
+contain restraints as defined by Boresch et al. (2003) J Phys Chem B 107(35);
+these include one distance, two angles, and three dihedrals between ligand and
+protein. You can either provide explicitly the atoms to be included in the
+restraints, or let the script choose them automatically.
+
+optional arguments:
+  -h, --help         show this help message and exit
+  -pt protop         Input topology file for the protein. Default is
+                     "protein.top".
+  -lt ligtop         Input topology file for the ligand. It is expected that
+                     all params needed for the ligand are explicitly defined
+                     in this file. Default is "ligand.itp".
+  -pc procrd         Input structure file in PDB or GRO format for the
+                     protein. Default is "protein.gro".
+  -lc ligcrd         Input structure file in PDB or GRO format for the ligand.
+                     Default is "ligand.gro".
+  --build            Whether to build the system (editconf, solvate, genion)
+                     with a standard setup once the input files (top, gro) are
+                     ready.
+  --doublebox        Whether to use the double-system single-box setup. This
+                     is useful for charged ligands. Default is False.
+  --longest_axis     Whether to just place structures along the longest axis,
+                     rather then minimising the volume. This option is
+                     relevant only when using --doublebox. Default is False.
+  --keep_intra       Whether to keep the LJ intramolecular interactions when
+                     the ligand is decoupled. This option is relevant only
+                     when using --doublebox. Default is False.
+  --lig_ids          Three atom indices. If provided, these will be used for
+                     the protein-ligand restraints. Otherwise they are chosen
+                     automatically.
+  --pro_ids          Three atom indices. If provided, these will be used for
+                     the protein-ligand restraints. Otherwise they are chosen
+                     automatically.
+  --restr_switch_on  Whether to switch the restraints on or off, where "on"
+                     means no restraints in stateA, and "off" means no
+                     restraints in state B. Default is True (switch on).
+  --seed int         Random seed to use when picking atoms for the restraints.
+                     The automated restraints selection is stochastic, so if
+                     you want to have a reproducible behaviour, provide a
+                     random seed.
+"""
diff --git a/icolos/core/workflow_steps/pmx/assemble_systems.py b/icolos/core/workflow_steps/pmx/assemble_systems.py
new file mode 100644
index 0000000..f2e2a64
--- /dev/null
+++ b/icolos/core/workflow_steps/pmx/assemble_systems.py
@@ -0,0 +1,53 @@
+from typing import Dict, List
+from icolos.core.workflow_steps.pmx.base import StepPMXBase
+from pydantic import BaseModel
+import os
+from icolos.utils.enums.program_parameters import PMXAtomMappingEnum, PMXEnum
+from icolos.utils.execute_external.pmx import PMXExecutor
+from icolos.utils.general.parallelization import SubtaskContainer
+
+_PE = PMXEnum()
+_PAE = PMXAtomMappingEnum()
+
+
+class StepPMXAssembleSystems(StepPMXBase, BaseModel):
+    """
+    Executes the assemble_systems.py script, edges are parallelized over available cores
+
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=PMXExecutor)
+
+    def execute(self):
+        assert self.work_dir is not None and os.path.isdir(self.work_dir)
+
+        # get edges from the perturbation map attached to the step
+        edges = self.get_edges()
+
+        # enforce one edge per task list (results in multiple batches for large maps)
+        self.execution.parallelization.max_length_sublists = 1
+        self._subtask_container = SubtaskContainer(
+            max_tries=self.execution.failure_policy.n_tries
+        )
+        self._subtask_container.load_data(edges)
+        self._execute_pmx_step_parallel(
+            run_func=self._execute_command, step_id="pmx_setup"
+        )
+
+    def _execute_command(self, edges: List, q: Dict):
+
+        args = {
+            "-edges": '"' + " ".join([e.get_edge_id() for e in edges]) + '"',
+            "-ligand_path": os.path.join(self.work_dir, _PAE.LIGAND_DIR),
+            "-workPath": self.work_dir,
+        }
+        result = self._backend_executor.execute(
+            command=_PE.ASSEMBLE_SYSTEMS,
+            arguments=self.get_arguments(defaults=args),
+            check=True,
+            location=self.work_dir,
+        )
+        q[edges[0].get_edge_id()] = result.returncode
diff --git a/icolos/core/workflow_steps/pmx/atomMapping.py b/icolos/core/workflow_steps/pmx/atomMapping.py
new file mode 100644
index 0000000..7bf1f76
--- /dev/null
+++ b/icolos/core/workflow_steps/pmx/atomMapping.py
@@ -0,0 +1,86 @@
+from typing import Dict, List
+from icolos.core.containers.perturbation_map import Edge
+from icolos.core.workflow_steps.pmx.base import StepPMXBase
+from pydantic import BaseModel
+from icolos.utils.execute_external.pmx import PMXExecutor
+import os
+from icolos.utils.enums.program_parameters import PMXEnum, PMXAtomMappingEnum
+from icolos.utils.general.parallelization import SubtaskContainer
+
+_PE = PMXEnum()
+_PAE = PMXAtomMappingEnum()
+
+
+class StepPMXatomMapping(StepPMXBase, BaseModel):
+    """Ligand alchemy: map atoms for morphing."""
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        self._initialize_backend(executor=PMXExecutor)
+
+    def _prepare_arguments(self, args, output_dir):
+        # prepare the final set of arguments as a list
+        prepared_args = []
+        default_args = {
+            "-o1": f"{output_dir}/pairs1.dat",
+            "-o2": f"{output_dir}/pairs2.dat",
+            "-opdb1": f"{output_dir}/out_pdb1.pdb",
+            "-opdb2": f"{output_dir}/out_pdb2.pdb",
+            "-opdbm1": f"{output_dir}/out_pdbm1.pdb",
+            "-opdbm2": f"{output_dir}/out_pdbm2.pdb",
+            "-score": f"{output_dir}/score.dat",
+            "-log": f"{output_dir}/mapping.log",
+        }
+        for key, value in args.items():
+            default_args[key] = value
+
+        for key, value in default_args.items():
+            prepared_args.append(key),
+            prepared_args.append(value)
+        return prepared_args
+
+    def _execute_command(self, edges: List[Edge], q: Dict):
+        assert isinstance(edges, list)
+        edge = edges[0]
+        lig1 = edge.get_source_node_name()
+        lig2 = edge.get_destination_node_name()
+        # write them to the right dir as a pdb from the outset
+        arguments = {
+            "-i1": os.path.join(
+                self.work_dir,
+                _PAE.LIGAND_DIR,
+                lig1,
+                "MOL.pdb",
+            ),
+            "-i2": os.path.join(
+                self.work_dir,
+                _PAE.LIGAND_DIR,
+                lig2,
+                "MOL.pdb",
+            ),
+        }
+        output_dir = os.path.join(self.work_dir, edge.get_edge_id(), _PE.HYBRID_STR_TOP)
+        arguments = self._prepare_arguments(args=arguments, output_dir=output_dir)
+
+        result = self._backend_executor.execute(
+            command=_PE.ATOMMAPPING,
+            arguments=arguments,
+            check=True,
+            location=self.work_dir,
+        )
+        q[edge.get_edge_id()] = result.returncode
+
+    def execute(self):
+        # check the workflow has been configured correctly to use a shared work_dir
+        assert self.work_dir is not None and os.path.isdir(self.work_dir)
+
+        edges = self.get_edges()
+        # enforce single edge per job queue
+        self.execution.parallelization.max_length_sublists = 1
+        self._subtask_container = SubtaskContainer(
+            max_tries=self.execution.failure_policy.n_tries
+        )
+        self._subtask_container.load_data(edges)
+        self._execute_pmx_step_parallel(
+            run_func=self._execute_command, step_id="atomMapping"
+        )
diff --git a/icolos/core/workflow_steps/pmx/base.py b/icolos/core/workflow_steps/pmx/base.py
new file mode 100644
index 0000000..7fe0f0c
--- /dev/null
+++ b/icolos/core/workflow_steps/pmx/base.py
@@ -0,0 +1,255 @@
+from subprocess import CompletedProcess
+from pydantic import BaseModel
+from icolos.core.containers.perturbation_map import PerturbationMap
+from icolos.core.workflow_steps.step import StepBase
+from icolos.utils.enums.program_parameters import GromacsEnum
+from icolos.utils.enums.step_enums import StepGromacsEnum
+from icolos.utils.execute_external.execute import Executor
+from icolos.utils.execute_external.pmx import PMXExecutor
+import os
+from icolos.utils.general.parallelization import Parallelizer
+from icolos.core.workflow_steps.step import _LE
+import shutil
+
+_GE = GromacsEnum()
+_SGE = StepGromacsEnum()
+
+
+class StepPMXBase(StepBase, BaseModel):
+
+    _antechamber_executor: Executor = None
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=PMXExecutor)
+        self._check_backend_availability()
+        self._antechamber_executor = Executor(prefix_execution=_SGE.AMBERTOOLS_LOAD)
+
+    def _parametrise_protein(
+        self,
+        protein: str = "protein.pdb",
+        path: str = "input/protein",
+        output="protein.pdb",
+    ):
+        # run pdb2gmx on the protein
+        pdb2gmx_args = [
+            "-f",
+            os.path.join(self.work_dir, path, protein),
+            "-ignh",
+            "-water",
+            self.settings.additional["water"],
+            "-ff",
+            self.settings.additional["forcefield"],
+            "-o",
+            os.path.join(self.work_dir, path, output),
+        ]
+        self._gromacs_executor.execute(
+            command=_GE.PDB2GMX,
+            arguments=pdb2gmx_args,
+            check=True,
+            location=os.path.join(self.work_dir, path),
+        )
+
+    def _clean_pdb_structure(self, tmp_dir: str) -> None:
+        files = [file for file in os.listdir(tmp_dir) if file.endswith("pdb")]
+        for file in files:
+            cleaned_lines = []
+            with open(os.path.join(tmp_dir, file), "r") as f:
+                lines = f.readlines()
+            for line in lines:
+                if "ATOM" in line or "HETATM" in line:
+                    cleaned_lines.append(line)
+            with open(os.path.join(tmp_dir, file), "w") as f:
+                f.writelines(cleaned_lines)
+
+    def _parametrisation_pipeline(self, tmp_dir, include_top=False, include_gro=False):
+        # main pipeline for producing GAFF parameters for a ligand
+        arguments_antechamber = [
+            "-i",
+            "MOL.pdb",
+            "-o",
+            "MOL.mol2",
+            "-fi",
+            "pdb",
+            "-fo",
+            "mol2",
+            "-c",
+            "gas",
+        ]
+        self._logger.log(
+            f"Running antechamber on structure {tmp_dir.split('/')[-1]}", _LE.DEBUG
+        )
+        self._antechamber_executor.execute(
+            command=_GE.ANTECHAMBER,
+            arguments=arguments_antechamber,
+            check=True,
+            location=tmp_dir,
+        )
+
+        arguments_acpype = [
+            os.path.join(_GE.ACPYPE_PATH, _GE.ACPYPE_BINARY),
+            "-di",
+            "MOL.mol2",
+            "-c",
+            "gas",
+        ]
+        self._antechamber_executor.execute(
+            command=_GE.PYTHON, arguments=arguments_acpype, location=tmp_dir, check=True
+        )
+        # search the output dir for the itp file
+        acpype_dir = [p for p in os.listdir(tmp_dir) if p.endswith(".acpype")][0]
+        itp_file = [
+            f
+            for f in os.listdir(os.path.join(tmp_dir, acpype_dir))
+            if f.endswith("GMX.itp")
+        ][0]
+        shutil.copyfile(
+            os.path.join(tmp_dir, acpype_dir, itp_file),
+            # standardized name must be enforced here to make argument
+            # parsing easier in subsequent pmx steps
+            os.path.join(tmp_dir, "MOL.itp"),
+        )
+        # for abfe calculations we need the ligand_GMX.top + .gro files as well
+        if include_top:
+            top_file = [
+                f
+                for f in os.listdir(os.path.join(tmp_dir, acpype_dir))
+                if f.endswith("GMX.top")
+            ][0]
+            shutil.copyfile(
+                os.path.join(tmp_dir, acpype_dir, top_file),
+                os.path.join(tmp_dir, top_file),
+            )
+        if include_gro:
+            gro_file = [
+                f
+                for f in os.listdir(os.path.join(tmp_dir, acpype_dir))
+                if f.endswith("GMX.gro")
+            ][0]
+            shutil.copyfile(
+                os.path.join(tmp_dir, acpype_dir, gro_file),
+                os.path.join(tmp_dir, gro_file),
+            )
+
+    def _execute_pmx_step_parallel(self, run_func, step_id: str):
+        """
+        Instantiates Icolos's parallelizer object,
+        runs the step's execute method,
+        checks the reutrn codes i.e. will error if an edge fails
+        """
+        parallelizer = Parallelizer(func=run_func, collect_rtn_codes=True)
+        n = 1
+        while self._subtask_container.done() is False:
+
+            next_batch = self._get_sublists(
+                get_first_n_lists=self._get_number_cores()
+            )  # return n lists of length max_sublist_length
+            _ = [sub.increment_tries() for element in next_batch for sub in element]
+            _ = [sub.set_status_failed() for element in next_batch for sub in element]
+            edges = self._prepare_edges(next_batch)
+            # to avoid simultaneous processes logging to the same file, pass the
+            self._logger.log(
+                f"Executing {step_id} for batch {n}, containing {len(edges)} * {len(edges[0])} edges",
+                _LE.INFO,
+            )
+
+            rtn_codes = parallelizer.execute_parallel(
+                edges=edges,
+            )
+            assert len(rtn_codes) == len(next_batch)
+            for idx, sublist in enumerate(next_batch):
+                for task in sublist:  # one edge per sublist
+                    if rtn_codes[idx] == 0:
+                        task.set_status_success()
+                    else:
+                        task.set_status_failed()
+
+            n += 1
+
+    def get_arguments(self, defaults: dict = None) -> list:
+        """
+        Construct pmx-specific arguments from the step defaults,
+        overridden by arguments specified in the config file
+        """
+        arguments = []
+
+        # add flags
+        for flag in self.settings.arguments.flags:
+            arguments.append(flag)
+
+        # flatten the dictionary into a list for command-line execution
+        for key in self.settings.arguments.parameters.keys():
+            arguments.append(key)
+            arguments.append(self.settings.arguments.parameters[key])
+
+        # add defaults, if not already present
+        if defaults is not None:
+            for key, value in defaults.items():
+                if key not in arguments:
+                    arguments.append(key)
+                    arguments.append(value)
+        return arguments
+
+    def get_edges(self):
+        """
+        Inspect the map object  passed to the step and extract the edge info
+        """
+
+        return self.get_workflow_object().workflow_data.perturbation_map.edges
+
+    def get_nodes(self):
+        """
+        return the nodes attached to the perturbation map
+        """
+        return self.get_workflow_object().workflow_data.perturbation_map.nodes
+
+    def _get_line_idx(self, data: list, id_str: str) -> int:
+        line = [e for e in data if id_str in e]
+        assert len(line) == 1
+        line = line[0]
+        return data.index(line)
+
+    def _construct_perturbation_map(self, work_dir: str, replicas: int):
+        # construct the perturbation map and load in the log file
+        log_file = self.data.generic.get_argument_by_extension(
+            "log", rtn_file_object=True
+        )
+        log_file.write(work_dir)
+        perturbation_map = PerturbationMap(
+            compounds=self.data.compounds,
+            protein=self.data.generic.get_argument_by_extension(
+                "pdb", rtn_file_object=True
+            ),
+            replicas=replicas,
+        )
+        perturbation_map.parse_map_file(
+            os.path.join(self.work_dir, log_file.get_file_name())
+        )
+
+        self._logger.log(
+            f"Initialised perturbation map with {len(perturbation_map.get_nodes())} nodes and {len(perturbation_map.get_edges())} edges",
+            _LE.INFO,
+        )
+        self.get_workflow_object().set_perturbation_map(perturbation_map)
+
+    def _get_line_idx(self, data, id_str) -> int:
+        # utility to extract the line index with a specific id string
+        line = [e for e in data if id_str in e]
+        assert len(line) == 1
+        line = line[0]
+        return data.index(line)
+
+    def _prepare_edges(self, batch):
+        edges = []
+
+        for task in batch:
+            task_edges = []
+            for element in task:  # for now, only a single element
+                task_edges.append(element.data)
+            edges.append(task_edges)
+        return edges
+
+    def _log_result(self, result: CompletedProcess):
+        for line in result.stderr.split("\n"):
+            self._logger_blank.log(line, _LE.DEBUG)
diff --git a/icolos/core/workflow_steps/pmx/box_water_ions.py b/icolos/core/workflow_steps/pmx/box_water_ions.py
new file mode 100644
index 0000000..2be2023
--- /dev/null
+++ b/icolos/core/workflow_steps/pmx/box_water_ions.py
@@ -0,0 +1,58 @@
+from typing import Dict, List
+from icolos.core.workflow_steps.pmx.base import StepPMXBase
+from pydantic import BaseModel
+import os
+from icolos.utils.enums.program_parameters import PMXAtomMappingEnum, PMXEnum
+from icolos.core.workflow_steps.step import _LE
+from icolos.utils.execute_external.pmx import PMXExecutor
+from icolos.utils.general.parallelization import SubtaskContainer
+
+_PE = PMXEnum()
+_PAE = PMXAtomMappingEnum()
+
+
+class StepPMXBoxWaterIons(StepPMXBase, BaseModel):
+    """
+    Take the prepard structure files and prepare the system,
+    runs editconf, solvate, genion and grompp for each system
+    to be simulated
+    """
+
+    # Note all paths are relative to the workdir
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=PMXExecutor)
+
+    def execute(self):
+        # run the wrapper script in pmx to prepare the systems
+
+        edges = self.get_edges()
+
+        self.execution.parallelization.max_length_sublists = 1
+        self._subtask_container = SubtaskContainer(
+            max_tries=self.execution.failure_policy.n_tries
+        )
+        self._subtask_container.load_data(edges)
+        self._execute_pmx_step_parallel(
+            run_func=self._execute_command, step_id="BoxWaterIons"
+        )
+
+    def _execute_command(self, edges: List, q: Dict):
+
+        arguments = {
+            "-edges": '"' + " ".join([e.get_edge_id() for e in edges]) + '"',
+            "-ligandPath": os.path.join(self.work_dir, _PAE.LIGAND_DIR),
+            "-workPath": self.work_dir,
+        }
+
+        result = self._backend_executor.execute(
+            command=_PE.BOX_WATER_IONS,
+            arguments=self.get_arguments(defaults=arguments),
+            check=True,
+            location=self.work_dir,
+        )
+
+        self._logger.log("End of BoxWaterIons output", _LE.DEBUG)
+        # collect returncodes from subprocess
+        q[edges[0].get_edge_id()] = result.returncode
diff --git a/icolos/core/workflow_steps/pmx/doublebox.py b/icolos/core/workflow_steps/pmx/doublebox.py
new file mode 100644
index 0000000..9653cc4
--- /dev/null
+++ b/icolos/core/workflow_steps/pmx/doublebox.py
@@ -0,0 +1,33 @@
+from icolos.core.workflow_steps.pmx.base import StepPMXBase
+from pydantic import BaseModel
+
+
+class StepPMXdoublebox(StepPMXBase, BaseModel):
+    """Place two input structures into a single box."""
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def execute(self):
+        pass
+
+
+help_string = """
+pmx doublebox -h
+usage: pmx [-h] -f1  -f2  [-o] [-r] [-d] [--longest_axis]
+
+Places two structures into a single box. The box is a rectangular cuboid in
+which the two structures are placed in such a way as to minimise the box
+volume. You can use this script to help in the setup of a calculation using
+the single-box double-system approach.
+
+optional arguments:
+  -h, --help      show this help message and exit
+  -f1             First structure in PDB or GRO format.
+  -f2             Second structure in PDB or GRO format.
+  -o              Name of output file. Default is "doublebox.pdb".
+  -r              Distance between the two structures (nm). Default is 2.5 nm.
+  -d              Distance to the box wall (nm). Default is 1.5 nm.
+  --longest_axis  Whether to just place structures along the longest axis,
+                  rather then minimising the volume. Default is False.
+"""
diff --git a/icolos/core/workflow_steps/pmx/genlib.py b/icolos/core/workflow_steps/pmx/genlib.py
new file mode 100644
index 0000000..0646297
--- /dev/null
+++ b/icolos/core/workflow_steps/pmx/genlib.py
@@ -0,0 +1,68 @@
+from icolos.core.workflow_steps.pmx.base import StepPMXBase
+from pydantic import BaseModel
+
+
+class StepPMXgenlib(StepPMXBase, BaseModel):
+    """Generate pmx ff library."""
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def execute(self):
+        pass
+
+
+help_string = """
+pmx genlib -h
+usage: pmx [-h] [-f1 ipdb1] [-f2 ipdb2] [-o1 opdb1] [-o2 opdb2]
+           [--ffpath ffpath] [--fatp fatp] [--fnb fnb] [--moltype moltype]
+           [--noalign] [--cbeta] [--noH2Heavy] [--log log]
+
+The script creates hybrid structure and topology database entries (mtp and rtp)
+in order to generate a pmx alchemical force field library.
+
+The easiest way to generate the library is to call this script from within
+the folder of the force field you are interested in.
+
+If two pdb files (aligned on the backbone) are provided, the hybrid pdb, mtp,
+and rtp files are written to file. If no pdb input file is provided,
+the script uses pregenerated residues in order to build hybrid pdb, mtp, and
+rtp files for all possible residue pairs, thus preparing the whole pmx ff
+library.
+
+In addition, atomtype (-fatp) and non-bonded parameter (-fnm) files for the
+introduced dummy atoms are generated. By default, these point towards the
+files already present in the forcefield. In this way, the additional parameters
+for the dummies are appended to the existing ff file, rather than being
+written to new files.
+
+optional arguments:
+  -h, --help         show this help message and exit
+  -f1 ipdb1          First input PDB file. Default is none provided.
+  -f2 ipdb2          Second input PDB file. Default is none provided.
+  -o1 opdb1          First output PDB file. Default is none provided.
+  -o2 opdb2          Second output PDB file. Default is none provided.
+  --ffpath ffpath    Path to mutation forcefield. Default is current folder.
+  --fatp fatp        Atom types (atp) file. If the file is 
+                     present, data is appended to it, otherwise a new 
+                     file is created. Default is "atomtypes.atp".
+  --fnb fnb          Non-bonded (nb) types file. If the file is 
+                     present, data is appended to it, otherwise a new 
+                     file is created. Default is "ffnonbonded.itp".
+  --moltype moltype  The type of molecule for which the library is 
+                     being built. Available options are "protein", "dna", 
+                     or "rna". Default is "protein".
+  --noalign          Whether to align the sidechains of the two 
+                     input PDB files provided. Default it True; this flag 
+                     sets it to False.
+  --cbeta            Whether to morph sidechain between the two 
+                     residues or to use dummy atoms to (de)couple the 
+                     whole sidechain. By default, sidechain atoms are 
+                     morphed so to minimise the size of the perturbation. 
+                     With this flag set, whole sidechains are (de)coupled 
+                     instead; i.e. all atoms after C-beta are not mapped 
+                     between the two residues.
+  --noH2Heavy        Whether to allow hydrogen to/from heavy atoms 
+                     morphing. Default is True, this flag sets it to False.
+  --log log          Logging level. Either "info" or "debug". Default is "info".
+"""
diff --git a/icolos/core/workflow_steps/pmx/gentop.py b/icolos/core/workflow_steps/pmx/gentop.py
new file mode 100644
index 0000000..52f8d53
--- /dev/null
+++ b/icolos/core/workflow_steps/pmx/gentop.py
@@ -0,0 +1,46 @@
+from icolos.core.workflow_steps.pmx.base import StepPMXBase
+from pydantic import BaseModel
+
+
+class StepPMXgentop(StepPMXBase, BaseModel):
+    """Fill hybrid topology with B states."""
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def execute(self):
+        pass
+
+
+help_string = """
+pmx gentop -h
+usage: pmx [-h] [-p topol] [-o outfile] [-ff ff] [--split] [--scale_mass]
+           [--scale_dih SCALE_DIH] [--norecursive]
+
+This script fills in the B state to a topology file (itp or top) according to
+the hybrid residues present in the file. If you provide a top file with
+include statemets, by default the script will run through the included itp
+files too; this can turned off using the --norecursive flag. You need to use
+this script after having mutated a structure file with pmx mutate, and after
+having passed that mutated structure through pdb2gmx.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -p topol              Input topology file (itp or top). Default is
+                        "topol.top"
+  -o outfile            Output topology file. Default is "pmxtop.top"
+  -ff ff                Force field to use. If -p is a top file, it is not
+                        necessary to specify the forcefield, as it will be
+                        determined automatically. If -p is an itp file, then
+                        -ff is needed, and if not provided a list of available
+                        ff will be shown.
+  --split               Write separate topologies for the vdW and charge
+                        transformations.
+  --scale_mass          Scale the masses of morphing atoms so that dummies
+                        have a mass of 1.
+  --scale_dih SCALE_DIH
+                        Scale the dihedrals that have a dummy.
+  --norecursive         Whether to fill the B states also for all itp files
+                        included in the provided topology file. Default is
+                        True. This flag sets it to False.
+"""
diff --git a/icolos/core/workflow_steps/pmx/ligandHybrid.py b/icolos/core/workflow_steps/pmx/ligandHybrid.py
new file mode 100644
index 0000000..3719b41
--- /dev/null
+++ b/icolos/core/workflow_steps/pmx/ligandHybrid.py
@@ -0,0 +1,121 @@
+from icolos.core.workflow_steps.pmx.base import StepPMXBase
+from pydantic import BaseModel
+import os
+from icolos.utils.enums.program_parameters import PMXEnum, PMXAtomMappingEnum
+from icolos.core.workflow_steps.step import _LE
+import numpy as np
+
+_PE = PMXEnum()
+_PAE = PMXAtomMappingEnum()
+
+
+class StepPMXligandHybrid(StepPMXBase, BaseModel):
+    """Ligand alchemy: hybrid structure/topology."""
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def _execute_command(self, args):
+        self._backend_executor.execute(
+            command=_PE.LIGANDHYBRID, arguments=args, check=True, location=self.work_dir
+        )
+
+    def _prepare_arguments(self, args, output_dir):
+        """
+        Prepare the final set of arguments as a list, config overrides defaults
+        """
+        prepared_args = []
+        default_args = {
+            "-pairs": f"{output_dir}/pairs1.dat",
+            "-oA": f"{output_dir}/mergedA.pdb",
+            "-oB": f"{output_dir}/mergedB.pdb",
+            "-oitp": f"{output_dir}/merged.itp",
+            "-offitp": f"{output_dir}/ffmerged.itp",
+            "-log": f"{output_dir}/mapping.log",
+        }
+        for key, value in args.items():
+            default_args[key] = value
+
+        for key, value in self.settings.arguments.parameters.items():
+            default_args[key] = value
+
+        for key, value in default_args.items():
+            prepared_args.append(key),
+            prepared_args.append(value)
+
+        for flag in self.settings.arguments.flags:
+            prepared_args.append(flag)
+        return prepared_args
+
+    def execute(self):
+        assert self.work_dir is not None and os.path.isdir(self.work_dir)
+
+        edges = self.get_edges()
+        total_edges = len(edges)
+        for idx, edge in enumerate(edges):
+            progress = np.round(idx / total_edges * 100, 2)
+            self._logger.log(
+                f"Executing pmx ligandHybrid for edge {edge.get_edge_id()} - {progress}% complete",
+                _LE.DEBUG,
+            )
+            lig1 = edge.get_source_node_name()
+            lig2 = edge.get_destination_node_name()
+
+            arguments = {
+                "-i1": os.path.join(
+                    self.work_dir,
+                    _PAE.LIGAND_DIR,
+                    lig1,
+                    "MOL.pdb",
+                ),
+                "-i2": os.path.join(
+                    self.work_dir,
+                    _PAE.LIGAND_DIR,
+                    lig2,
+                    "MOL.pdb",
+                ),
+                "-itp1": os.path.join(self.work_dir, _PAE.LIGAND_DIR, lig1, "MOL.itp"),
+                "-itp2": os.path.join(self.work_dir, _PAE.LIGAND_DIR, lig2, "MOL.itp"),
+            }
+            # write output files the hybrodStrTop directory for each edge
+            output_dir = os.path.join(
+                self.work_dir, edge.get_edge_id(), _PE.HYBRID_STR_TOP
+            )
+            arguments = self._prepare_arguments(args=arguments, output_dir=output_dir)
+
+            self._execute_command(arguments)
+
+
+help_string = """
+pmx ligandHybrid -h
+usage: pmx [-h] [-i1 lig1.pdb] [-i2 lig2.pdb] [-itp1 lig1.itp]
+           [-itp2 lig2.itp] [-pairs pairs.dat] [-n1 scaffold1.ndx]
+           [-n2 scaffold2.ndx] [-oA mergedA.pdb] [-oB mergedB.pdb]
+           [-oitp merged.itp] [-offitp ffmerged.itp] [-log hybrid.log]
+           [--d 0.05] [--fit] [--split] [--scDUMm 1.0] [--scDUMa 1.0]
+           [--scDUMd 1.0] [--deAng]
+
+Provided two structures and topologies, build hybrid structure/topology.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -i1 lig1.pdb          Input ligand structure 1. Default is "lig1.pdb"
+  -i2 lig2.pdb          Input ligand structure 2. Default is "lig2.pdb"
+  -itp1 lig1.itp        Input ligand topology 1. Default is "lig1.itp"
+  -itp2 lig2.itp        Input ligand topology 2. Default is "lig2.itp"
+  -pairs pairs.dat      Optional input: atom pair mapping. 
+  -n1 scaffold1.ndx     Optional input: index of atoms to consider for mol1 
+  -n2 scaffold2.ndx     Optional input: index of atoms to consider for mol2 
+  -oA mergedA.pdb       Output: hybrid structure based on the ligand 1. Default is "mergedA.pdb"
+  -oB mergedB.pdb       Output: hybrid structure based on the ligand 2. Default is "mergedB.pdb"
+  -oitp merged.itp      Output: hybrid topology. Default is "merged.itp"
+  -offitp ffmerged.itp  Output: atomtypes for hybrid topology. Default is "ffmerged.itp"
+  -log hybrid.log       Output: log file. Default is "hybrid.log"
+  --d 0.05              Optional: if -pairs not provided, distance (nm) between atoms to consider them morphable for alignment approach (default 0.05 nm).
+  --fit                 Fit mol2 onto mol1, only works if pairs.dat is provided
+  --split               split the topology into separate transitions
+  --scDUMm 1.0          scale dummy masses using the counterpart atoms
+  --scDUMa 1.0          scale bonded dummy angle parameters
+  --scDUMd 1.0          scale bonded dummy dihedral parameters
+  --deAng               decouple angles composed of 1 dummy and 2 non-dummies
+"""
diff --git a/icolos/core/workflow_steps/pmx/mutate.py b/icolos/core/workflow_steps/pmx/mutate.py
new file mode 100644
index 0000000..d92bfbf
--- /dev/null
+++ b/icolos/core/workflow_steps/pmx/mutate.py
@@ -0,0 +1,67 @@
+from icolos.core.workflow_steps.pmx.base import StepPMXBase
+from pydantic import BaseModel
+
+
+class StepPMXmutate(StepPMXBase, BaseModel):
+    """Mutate protein or DNA/RNA."""
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def execute(self):
+        pass
+
+
+help_string = """
+pmx mutate -h
+usage: pmx [-h] [-f infile] [-fB infileB] [-o outfile] [-ff ff]                                    
+           [--script script] [--keep_resid | --ref ] [--resinfo]                                   
+                                                                                                   
+This script applies mutations of residues in a structure file for subsequent                                          
+free energy calculations. It supports mutations to protein, DNA, and RNA                           
+molecules.                                                                                         
+                                                                          
+The mutation information and dummy placements are taken from the hybrid residue
+database "mutres.mtp". The best way to use this script is to take a pdb/gro file                                                                    
+that has been written with pdb2gmx with all hydrogen atoms present.
+                                                                          
+By default, all residues are renumbered starting from 1, so to have unique
+residue IDs. If you want to keep the original residue IDs, you can use the flag                                                                     
+--keep_resid. In this case, you will also need to provide chain information                                                                         
+in order to be able to mutate the desired residue. Alternatively, if you would                                                                      
+like to use the original residue IDs but these have been changed, e.g. by gromacs,
+you can provide a reference PDB file (with chain information too) using the --ref
+flag. The input structure will be mutated according to the IDs chosen for the
+reference structure after having mapped the two residue indices.
+
+The program can either be executed interactively or via script. The script file
+simply has to consist of "residue_id target_residue_name" pairs (just with some
+space between the id and the name), or "chain_id residue_id target_residue_name"
+if you are keeping the original residue IDs or providing a reference structure.
+
+The script uses an extended one-letter code for amino acids to account for
+different protonation states. Use the --resinfo flag to print the dictionary.
+
+optional arguments:
+  -h, --help       show this help message and exit
+  -f infile        Input structure file in PDB or GRO format. Default is "protein.pdb"
+  -fB infileB      Input structure file of the B state in PDB or GRO format (optional).
+  -o outfile       Output structure file in PDB or GRO format. Default is "mutant.pdb"
+  -ff ff           Force field to use. If none is provided, 
+                   a list of available ff will be shown.
+  --script script  Text file with list of mutations (optional).
+  --keep_resid     Whether to renumber all residues or to keep the
+                   original residue IDs. By default, all residues are
+                   renumbered so to have unique IDs. With this flags set,
+                   the original IDs are kept. Because the IDs might not
+                   be unique anymore, you will also be asked to choose
+                   the chain ID where the residue you want to mutate is.
+  --ref            Provide a reference PDB structure from which to map
+                   the chain and residue IDs onto the file to be mutated (-f).
+                   This can be useful when wanting to mutate a file that
+                   has had its residues renumbered or the chain information
+                   removed (e.g. after gmx grompp). As in the --keep_resid
+                   option, if --ref is chosen, you will need to provide chain
+                   information either interactively or via the --script flag.
+  --resinfo        Show the list of 3-letter -> 1-letter residues
+"""
diff --git a/icolos/core/workflow_steps/pmx/prepare_simulations.py b/icolos/core/workflow_steps/pmx/prepare_simulations.py
new file mode 100644
index 0000000..40b3320
--- /dev/null
+++ b/icolos/core/workflow_steps/pmx/prepare_simulations.py
@@ -0,0 +1,51 @@
+from typing import Dict, List
+from icolos.core.workflow_steps.pmx.base import StepPMXBase
+from pydantic import BaseModel
+from icolos.utils.enums.program_parameters import PMXAtomMappingEnum, PMXEnum
+from icolos.utils.execute_external.pmx import PMXExecutor
+from icolos.utils.general.parallelization import SubtaskContainer
+
+_PE = PMXEnum()
+_PAE = PMXAtomMappingEnum()
+
+
+class StepPMXPrepareSimulations(StepPMXBase, BaseModel):
+    """
+    Prepare the tpr file for either equilibration or production simulations
+
+    Calls pmx util entrypoint prepare_simulations.py with
+    list of edges and the workdir path
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=PMXExecutor)
+
+    def execute(self):
+
+        edges = self.get_edges()
+        self.execution.parallelization.max_length_sublists = 1
+        self._subtask_container = SubtaskContainer(
+            max_tries=self.execution.failure_policy.n_tries
+        )
+        self._subtask_container.load_data(edges)
+        self._execute_pmx_step_parallel(
+            run_func=self._execute_command, step_id="pmx_prepare_sims"
+        )
+
+    def _execute_command(self, edges: List, q: Dict):
+        arguments = {
+            "-edges": '"' + " ".join([e.get_edge_id() for e in edges]) + '"',
+            "-workPath": self.work_dir,
+            "-sim_type": self.settings.additional["sim_type"],
+            "-replicas": self.get_workflow_object().workflow_data.perturbation_map.replicas,
+        }
+        result = self._backend_executor.execute(
+            command=_PE.PREPARE_SIMULATIONS,
+            arguments=self.get_arguments(defaults=arguments),
+            check=True,
+            location=self.work_dir,
+        )
+
+        q[edges[0].get_edge_id()] = result.returncode
diff --git a/icolos/core/workflow_steps/pmx/prepare_transitions.py b/icolos/core/workflow_steps/pmx/prepare_transitions.py
new file mode 100644
index 0000000..ca36588
--- /dev/null
+++ b/icolos/core/workflow_steps/pmx/prepare_transitions.py
@@ -0,0 +1,48 @@
+from icolos.utils.enums.logging_enums import LoggingConfigEnum
+from typing import Dict, List
+from icolos.core.workflow_steps.pmx.base import StepPMXBase
+from pydantic import BaseModel
+from icolos.utils.enums.program_parameters import PMXAtomMappingEnum, PMXEnum
+from icolos.utils.execute_external.pmx import PMXExecutor
+from icolos.utils.general.parallelization import SubtaskContainer
+
+_PE = PMXEnum()
+_PAE = PMXAtomMappingEnum()
+_LE = LoggingConfigEnum()
+
+
+class StepPMXPrepareTransitions(StepPMXBase, BaseModel):
+    """
+    Executes the pmx prepare_transitions.py entrypoint
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=PMXExecutor)
+
+    def execute(self):
+        edges = self.get_edges()
+
+        self.execution.parallelization.max_length_sublists = 1
+        self._subtask_container = SubtaskContainer(
+            max_tries=self.execution.failure_policy.n_tries
+        )
+        self._subtask_container.load_data(edges)
+        self._execute_pmx_step_parallel(
+            run_func=self._execute_command, step_id="pmx_prepare_transitions"
+        )
+
+    def _execute_command(self, edges: List, q: Dict):
+        args = {
+            "-edges": '"' + " ".join([e.get_edge_id() for e in edges]) + '"',
+            "-workPath": self.work_dir,
+            "-replicas": self.get_workflow_object().workflow_data.perturbation_map.replicas,
+        }
+        result = self._backend_executor.execute(
+            command=_PE.PREPARE_TRANSITIONS,
+            arguments=self.get_arguments(defaults=args),
+            check=True,
+            location=self.work_dir,
+        )
+        q[edges[0].get_edge_id()] = result.returncode
diff --git a/icolos/core/workflow_steps/pmx/run_analysis.py b/icolos/core/workflow_steps/pmx/run_analysis.py
new file mode 100644
index 0000000..6609e2b
--- /dev/null
+++ b/icolos/core/workflow_steps/pmx/run_analysis.py
@@ -0,0 +1,47 @@
+from typing import Dict, List
+from icolos.core.workflow_steps.pmx.base import StepPMXBase
+from pydantic import BaseModel
+from icolos.utils.enums.program_parameters import PMXAtomMappingEnum, PMXEnum
+from icolos.utils.execute_external.pmx import PMXExecutor
+from icolos.utils.general.parallelization import SubtaskContainer
+
+_PE = PMXEnum()
+_PAE = PMXAtomMappingEnum()
+
+
+class StepPMXRunAnalysis(StepPMXBase, BaseModel):
+    """
+    Executes pmx run_analysis.py script
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=PMXExecutor)
+
+    def execute(self):
+
+        edges = self.get_edges()
+        self.execution.parallelization.max_length_sublists = 1
+        self._subtask_container = SubtaskContainer(
+            max_tries=self.execution.failure_policy.n_tries
+        )
+        self._subtask_container.load_data(edges)
+        self._execute_pmx_step_parallel(
+            run_func=self._execute_command, step_id="pmx_run_analysis"
+        )
+
+    def _execute_command(self, edges: List, q: Dict):
+
+        args = {
+            "-edges": '"' + " ".join([e.get_edge_id() for e in edges]) + '"',
+            "-workPath": self.work_dir,
+            "-replicas": self.get_workflow_object().workflow_data.perturbation_map.replicas,
+        }
+        result = self._backend_executor.execute(
+            command=_PE.RUN_ANALYSIS,
+            arguments=self.get_arguments(defaults=args),
+            check=True,
+            location=self.work_dir,
+        )
+        q[edges[0].get_edge_id()] = result.returncode
diff --git a/icolos/core/workflow_steps/pmx/run_simulations.py b/icolos/core/workflow_steps/pmx/run_simulations.py
new file mode 100644
index 0000000..75613fb
--- /dev/null
+++ b/icolos/core/workflow_steps/pmx/run_simulations.py
@@ -0,0 +1,58 @@
+from typing import Dict, List
+from icolos.core.workflow_steps.pmx.base import StepPMXBase
+from pydantic import BaseModel
+import numpy as np
+from icolos.utils.enums.program_parameters import PMXAtomMappingEnum, PMXEnum
+from icolos.utils.execute_external.pmx import PMXExecutor
+from icolos.utils.general.parallelization import SubtaskContainer
+
+_PE = PMXEnum()
+_PAE = PMXAtomMappingEnum()
+
+
+class StepPMXRunSimulations(StepPMXBase, BaseModel):
+    """
+    Calls pmx run_simulations entrypoint, handles parallel execution across multiple GPUs
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=PMXExecutor)
+
+    def execute(self):
+
+        edges = self.get_edges()
+        # run everything through in one batch, with multiple edges per call
+        self.execution.parallelization.max_length_sublists = int(
+            np.ceil(len(edges) / self._get_number_cores())
+        )
+        self._subtask_container = SubtaskContainer(
+            max_tries=self.execution.failure_policy.n_tries
+        )
+        self._subtask_container.load_data(edges)
+        self._execute_pmx_step_parallel(
+            run_func=self._execute_command, step_id="pmx_run_simulations"
+        )
+
+    def _execute_command(self, edges: List, q: Dict):
+        """
+        Execute the simulations for one edge, calling the run_sims pmx entrypoint
+        """
+        args = {
+            "-edges": '"' + " ".join([e.get_edge_id() for e in edges]) + '"',
+            "-workPath": self.work_dir,
+            "-sim_type": self.settings.additional["sim_type"],
+            "-replicas": self.get_workflow_object().workflow_data.perturbation_map.replicas,
+        }
+        for key, value in self.settings.arguments.parameters:
+            args[key] = value
+
+        result = self._backend_executor.execute(
+            command=_PE.RUN_SIMULATIONS,
+            arguments=self.get_arguments(defaults=args),
+            check=True,
+            location=self.work_dir,
+        )
+
+        q[edges[0].get_edge_id()] = result.returncode
diff --git a/icolos/core/workflow_steps/pmx/setup_workpath.py b/icolos/core/workflow_steps/pmx/setup_workpath.py
new file mode 100644
index 0000000..117c608
--- /dev/null
+++ b/icolos/core/workflow_steps/pmx/setup_workpath.py
@@ -0,0 +1,192 @@
+from icolos.core.containers.perturbation_map import Node
+import os
+from typing import Dict
+from pydantic import BaseModel
+from icolos.core.workflow_steps.pmx.base import StepPMXBase
+from icolos.utils.enums.program_parameters import GromacsEnum
+from icolos.utils.enums.step_enums import StepGromacsEnum
+from icolos.utils.execute_external.execute import Executor
+from icolos.utils.execute_external.gromacs import GromacsExecutor
+from icolos.utils.general.parallelization import SubtaskContainer
+
+_GE = GromacsEnum()
+_SGE = StepGromacsEnum()
+
+
+class StepPMXSetup(StepPMXBase, BaseModel):
+    """
+    Create the directory tree structure.
+    Requires the pmx workflow to be executing using the single_dir running mode
+    Operates on the perturbation map object, runs acpype
+    on the written structures to produce the amber-compatible itp files
+    """
+
+    _gromacs_executor: GromacsExecutor = None
+    _antechamber_executor: Executor = None
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        self._gromacs_executor = GromacsExecutor(prefix_execution=_SGE.GROMACS_LOAD)
+
+    def _separate_atomtypes(self, lig_path: str) -> None:
+        with open(os.path.join(lig_path, "MOL.itp"), "r") as f:
+            itp_lines = f.readlines()
+
+        start_idx = self._get_line_idx(itp_lines, _GE.ATOMTYPES)
+        stop_index = self._get_line_idx(itp_lines, _GE.MOLECULETYPES)
+
+        atomtype_lines = itp_lines[start_idx:stop_index]
+        cleaned_itp_lines = itp_lines[stop_index:]
+        with open(os.path.join(lig_path, "MOL.itp"), "w") as f:
+            f.writelines(cleaned_itp_lines)
+
+        # process the atomtype lines to remove the bondtype
+        # col causes gmx to complain
+        cleaned_atomtype_lines = []
+        for line in atomtype_lines:
+            parts = line.split()
+            if len(parts) > 5:
+                cleaned_parts = [parts[0]] + parts[2:] + ["\n"]
+                cleaned_atomtype_lines.append(" ".join(cleaned_parts))
+        with open(os.path.join(lig_path, "ffMOL.itp"), "w") as f:
+            f.writelines(cleaned_atomtype_lines)
+
+    def execute(self):
+        # sets the number of replicas to be used throughput the pmx run
+        replicas = (
+            self.settings.additional["replicas"]
+            if "replicas" in self.settings.additional.keys()
+            else 3
+        )
+        assert self.work_dir is not None and os.path.isdir(self.work_dir)
+        self._construct_perturbation_map(self.work_dir, replicas)
+        # create the directory structure for subsequent calculations
+        edges = self.get_edges()
+        nodes = self.get_nodes()
+
+        # create the input directory to sit at the top level of the workdir, contains ligands,
+        # mdp and protein topology files
+        os.makedirs(os.path.join(self.work_dir, "input"), exist_ok=True)
+        for folder in ["ligands", "mdp", "protein"]:
+            os.makedirs(os.path.join(self.work_dir, "input", folder), exist_ok=True)
+
+        # handle protein parametrization with pdb2gmx
+        protein = (
+            self.get_workflow_object().workflow_data.perturbation_map.get_protein()
+        )
+        protein.write(os.path.join(self.work_dir, "input/protein"))
+
+        self._parametrise_protein(protein=protein.get_file_name(), path="input/protein")
+
+        # remove the backup file
+        old_protein = [
+            f
+            for f in os.listdir(os.path.join(self.work_dir, "input/protein"))
+            if f.endswith("#")
+        ]
+        # only want the parametrised processed pdb file in there
+        old_protein.append(protein.get_file_name())
+        for f in old_protein:
+            os.remove(os.path.join(self.work_dir, "input/protein", f))
+
+        existing_itp_files = [
+            f
+            for f in os.listdir(os.path.join(self.work_dir, "input/protein"))
+            if f.endswith("itp") and f.startswith("Protein")
+        ]
+        if (
+            not existing_itp_files
+        ):  # no protein itp files, we have a single chain that needs extacting from the top file
+            with open(os.path.join(self.work_dir, "input/protein/topol.top"), "r") as f:
+                top_lines = f.readlines()
+
+            moltype_line = self._get_line_idx(top_lines, _GE.MOLECULETYPES)
+
+            end_itp_line = self._get_line_idx(top_lines, "; Include water topology")
+
+            moltype = top_lines[moltype_line + 2].split()[0]
+            cleaned_top = (
+                top_lines[:moltype_line]
+                + [f'#include "topol_{moltype}.itp']
+                + top_lines[end_itp_line:]
+            )
+
+            itp_lines = top_lines[moltype_line:end_itp_line]
+
+            with open(os.path.join(self.work_dir, "input/protein/topol.top"), "w") as f:
+                f.writelines(cleaned_top)
+
+            with open(
+                os.path.join(self.work_dir, f"input/protein/topol_{moltype}.itp"), "w"
+            ) as f:
+                f.writelines(itp_lines)
+
+        mdp_dir = self.data.generic.get_argument_by_extension(
+            ext="mdp", rtn_file_object=True
+        )
+        mdp_dir.write(os.path.join(self.work_dir, "input/mdp"))
+
+        # parallelize the antechamber call across the pool of nodes
+
+        self.execution.parallelization.max_length_sublists = 1
+        self._subtask_container = SubtaskContainer(
+            max_tries=self.execution.failure_policy.n_tries
+        )
+        self._subtask_container.load_data(nodes)
+        self._execute_pmx_step_parallel(
+            run_func=self._parametrise_nodes, step_id="pmx_setup"
+        )
+
+        # create the output folder structure
+        for edge in edges:
+            edgepath = os.path.join(
+                self.work_dir,
+                str(f"{edge.node_from.get_node_hash()}_{edge.node_to.get_node_hash()}"),
+            )
+            hybridTopFolder = f"{edgepath}/hybridStrTop"
+            os.makedirs(hybridTopFolder, exist_ok=True)
+
+            # water/protein
+            for wp in ["water", "protein"]:
+                wppath = f"{edgepath}/{wp}"
+                os.makedirs(wppath, exist_ok=True)
+
+                # stateA/stateB
+                for state in ["stateA", "stateB"]:
+                    statepath = f"{wppath}/{state}"
+                    os.makedirs(statepath, exist_ok=True)
+
+                    # run1/run2/run3
+                    for r in range(1, replicas + 1):
+                        runpath = f"{statepath}/run{r}"
+                        os.makedirs(runpath, exist_ok=True)
+
+                        # em/eq_posre/eq/transitions
+                        for sim in ["em", "eq", "transitions"]:
+                            simpath = f"{runpath}/{sim}".format(runpath, sim)
+                            os.makedirs(simpath, exist_ok=True)
+
+    # TODO: sort out nomenclature here
+    def _parametrise_nodes(self, edges: Node, q: Dict):
+        # because we use the base-class infrastructure to parallelize, arg names are awkward
+        # in this case, we parallize over nodes, not edges!
+        if isinstance(edges, list):
+            node = edges[0]
+        else:
+            node = edges
+        lig_path = os.path.join(self.work_dir, "input", "ligands", node.get_node_hash())
+        os.makedirs(lig_path, exist_ok=True)
+        node.conformer.write(os.path.join(lig_path, "MOL.sdf"), format_="pdb")
+
+        # clean the written pdb, remove anything except hetatm/atom lines
+        self._clean_pdb_structure(lig_path)
+        # now run ACPYPE on the ligand to produce the topology file
+        self._parametrisation_pipeline(lig_path)
+
+        # produces MOL.itp, need to separate the atomtypes directive out into ffMOL.itp for pmx
+        # to generate the forcefield later
+        self._separate_atomtypes(lig_path)
+
+        # if we get through to here, return exit status 0
+
+        q[node.get_node_id()] = 0
diff --git a/icolos/core/workflow_steps/prediction/__init__.py b/icolos/core/workflow_steps/prediction/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/core/workflow_steps/prediction/active_learning.py b/icolos/core/workflow_steps/prediction/active_learning.py
new file mode 100644
index 0000000..0ff0621
--- /dev/null
+++ b/icolos/core/workflow_steps/prediction/active_learning.py
@@ -0,0 +1,267 @@
+from typing import List
+import os
+import random
+import pickle
+
+from modAL.acquisition import max_EI
+from modAL.models.learners import BayesianOptimizer
+from pydantic.main import BaseModel
+
+from sklearn.gaussian_process.kernels import WhiteKernel, RBF
+from sklearn.gaussian_process import GaussianProcessRegressor
+
+from icolos.core.containers.compound import Compound, Enumeration
+from icolos.core.workflow_steps.step import StepBase
+from icolos.core.workflow_steps.step import _LE
+from icolos.utils.enums.step_enums import (
+    StepBaseEnum,
+    StepGlideEnum,
+    StepActiveLearningEnum,
+)
+
+from rdkit.Chem.AllChem import GetMorganFingerprintAsBitVect
+from rdkit.Chem import PandasTools, Mol
+import pandas as pd
+from pandas.core.frame import DataFrame
+import numpy as np
+from sklearn.metrics import mean_squared_error
+from icolos.utils.enums.step_initialization_enum import StepInitializationEnum
+
+from icolos.utils.general.convenience_functions import nested_get
+
+
+_SGE = StepGlideEnum()
+_SALE = StepActiveLearningEnum()
+_IE = StepInitializationEnum()
+
+
+class StepActiveLearning(StepBase, BaseModel):
+    """
+    Class to run an active learning framework
+    Primarily designed for building QSAR models using a physics based method (embedding + docking) as an oracle
+
+    Takes the step conf for the oracle as an additional argument.  The step with these settings is run with the queried compounds at each stage of the active learning loop
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def _initialize_oracle(self, compound_list: List[pd.Series]) -> List[StepBase]:
+        # list of step configs
+        base_oracle_config = self.settings.additional["oracle_config"]
+        oracle_steps = []
+        for step in base_oracle_config:
+            oracle_steps.append(self._initialize_oracle_step_from_dict(step))
+
+        # manually attach the compound objects to the oracle's lead step
+        # subsequent steps should take their input from the the first step.
+        for idx, compound in enumerate(compound_list):
+            cmp = Compound(compound_number=idx)
+            cmp.add_enumeration(
+                Enumeration(
+                    compound_object=cmp,
+                    smile=compound[_SALE.SMILES],
+                    molecule=compound[_SALE.MOLECULE],
+                )
+            )
+            oracle_steps[0].data.compounds.append(cmp)
+
+        return oracle_steps
+
+    def query_oracle(self, compound_list: List[Mol]) -> List:
+        """
+        Interface function with the oracle method, in the most likely case this is ligprep + docking
+
+        Takes the requested compounds and runs them through the oracle workflow, returns the final compounds with annotations
+
+        Notes:
+        This could be an arbitrarily complex workflow, but the only thing that's going to change is the compounds.
+        """
+        # initialize the basic oracle, load the query compounds for evaluation
+        oracle_steps = self._initialize_oracle(compound_list)
+        # we have a fully initialized step with the compounds loaded.  Execute them
+        for idx, step in enumerate(oracle_steps):
+            # for subsequent steps we will need to read in from the previous one
+            if idx != 0:
+                step.generate_input()
+            step.execute()
+            step.process_write_out()
+
+        # retrieve compounds from the final step
+        final_compounds = oracle_steps[-1].data.compounds
+        return final_compounds
+
+    def _extract_final_scores(
+        self, compounds: List[Compound], criteria: str, highest_is_best: bool = False
+    ) -> List[float]:
+        """
+        Takes a list of compound objects from the oracle and extracts the best score based on the provided criteria
+        """
+        top_scores = []
+        for comp in compounds:
+            scores = []
+            for enum in comp.get_enumerations():
+                for conf in enum.get_conformers():
+                    scores.append(float(conf._conformer.GetProp(criteria)))
+
+            # if docking generated no conformers
+            # we probably want to filter these before the model sees them
+            if not scores:
+                scores.append(0.0)
+
+            best_score = max(scores) if highest_is_best else min(scores)
+            top_scores.append(best_score)
+
+        return top_scores
+
+    def _generate_library(self) -> DataFrame:
+        """
+        Loads the library file from disk
+        This should be a .sdf file with the pre-embedded compounds from a library enumeration or such
+        """
+        lib_path = self.settings.additional[_SALE.VIRTUAL_LIB]
+        assert lib_path.endswith(".sdf")
+
+        # hold the lib in a pandas df
+        library = PandasTools.LoadSDF(
+            lib_path,
+            smilesName=_SALE.SMILES,
+            molColName=_SALE.MOLECULE,
+            includeFingerprints=True,
+            removeHs=False,
+            embedProps=True,
+        )
+        # need the morgan fingerprints in the df
+        library[_SALE.MORGAN_FP] = library.apply(
+            lambda x: np.array(
+                GetMorganFingerprintAsBitVect(x[_SALE.MOLECULE], 2, nBits=2048)
+            ),
+            axis=1,
+        )
+
+        return library
+
+    def _prepare_initial_data(self, lib: pd.DataFrame):
+        initial_compound_idx = random.sample(
+            range(len(lib)), int(self.settings.additional[_SALE.INIT_SAMPLES])
+        )
+        data_rows = [lib.iloc[idx] for idx in initial_compound_idx]
+        # return annotated compound list
+        annotated_compounds = self.query_oracle(data_rows)
+
+        # extract top score per compound
+        init_scores: List[float] = self._extract_final_scores(
+            annotated_compounds, criteria=_SGE.GLIDE_DOCKING_SCORE
+        )
+        init_compounds = np.array([row[_SALE.MORGAN_FP] for row in data_rows])
+
+        return init_compounds, init_scores
+
+    def _prepare_validation_data(self):
+        """
+        parses sdf file with results to dataframe, extract fingerprints + results
+        """
+        val_lib = PandasTools.LoadSDF(
+            self.settings.additional[_SALE.VALIDATION_LIB],
+            smilesName=_SALE.SMILES,
+            molColName=_SALE.MOLECULE,
+            includeFingerprints=True,
+            removeHs=False,
+            embedProps=True,
+        )
+        # need the morgan fingerprints in the df
+        val_lib[_SALE.MORGAN_FP] = val_lib.apply(
+            lambda x: np.array(
+                GetMorganFingerprintAsBitVect(x[_SALE.MOLECULE], 2, nBits=2048)
+            ),
+            axis=1,
+        )
+        scores = list(
+            pd.to_numeric(val_lib[self.settings.additional[_SALE.CRITERIA]].fillna(0))
+        )
+        scores = [float(x) for x in scores]
+        return list(val_lib[_SALE.MORGAN_FP]), scores
+
+    def _filter_oracle_results(
+        self, compound_rows: List[pd.Series], scores: List[float]
+    ):
+        final_compounds, final_scores = [], []
+        for cmp, score in zip(compound_rows, scores):
+            if score != 0.0:
+                final_compounds.append(cmp)
+                final_scores.append(score)
+
+        return final_compounds, final_scores
+
+    def execute(self):
+        tmp_dir = self._make_tmpdir()
+
+        # TODO: Implement comittee model
+
+        # start with sdf of pre-calculatd ligand embeddings for each full peptide in the library
+        lib = self._generate_library()
+        init_compounds, init_scores = self._prepare_initial_data(lib)
+        # load validation set for later
+        validation_compounds, validation_scores = self._prepare_validation_data()
+
+        kernel = RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) + WhiteKernel(
+            noise_level=1, noise_level_bounds=(1e-10, 1e2)
+        )
+        learner = BayesianOptimizer(
+            # estimator=GaussianProcessRegressor(kernel=kernel),
+            estimator=GaussianProcessRegressor(kernel),
+            query_strategy=max_EI,
+            X_training=init_compounds,
+            y_training=init_scores,
+        )
+
+        for idx in range(int(self.settings.additional[_SALE.N_ROUNDS])):
+            # generate the requested points from the learner
+            query_idx, _ = learner.query(
+                list(lib[_SALE.MORGAN_FP]),
+                n_instances=int(self.settings.additional[_SALE.BATCH_SIZE]),
+            )
+            # generate oracle input
+            query_compounds = [lib.iloc[int(idx)] for idx in query_idx]
+            # query oracle
+
+            compounds = self.query_oracle(query_compounds)
+            scores = self._extract_final_scores(
+                compounds, self.settings.additional[_SALE.CRITERIA]
+            )
+            # some of the scores will be zero if they didn't dock, do we want to filter these out, only hand back those compounds with a non-zero score?
+            query_compounds, scores = self._filter_oracle_results(
+                query_compounds, scores
+            )
+
+            learner.teach(
+                np.array([compound[_SALE.MORGAN_FP] for compound in query_compounds]),
+                scores,
+            )
+            # need a held-out test set with docking scores already computed
+            performance = learner.score(validation_compounds, validation_scores)
+            self._logger.log(
+                f"Round {idx +1}; val set correlation: {performance}", _LE.INFO
+            )
+            # get the predictions
+            predictions = learner.predict(validation_compounds)
+            mse = mean_squared_error(validation_scores, predictions)
+            self._logger.log(f"Round {idx+1}; rmse: {np.sqrt(mse)}", _LE.INFO)
+
+        # pickle the final model
+        with open(os.path.join(tmp_dir, "model.pkl"), "wb") as f:
+            pickle.dump(learner, f)
+
+        self._parse_output(tmp_dir)
+
+    def _initialize_oracle_step_from_dict(self, step_conf: dict) -> StepBase:
+        # note this is a bit of a hack to get around a circular import, we can't use the main util
+        _STE = StepBaseEnum
+        step_type = nested_get(step_conf, _STE.STEP_TYPE, default=None)
+        step_type = None if step_type is None else step_type.upper()
+        if step_type in _IE.STEP_INIT_DICT.keys():
+            return _IE.STEP_INIT_DICT[step_type](**step_conf)
+        else:
+            raise ValueError(
+                f"Backend for step {nested_get(step_conf, _STE.STEPID, '')} unknown."
+            )
diff --git a/icolos/core/workflow_steps/prediction/model_building.py b/icolos/core/workflow_steps/prediction/model_building.py
new file mode 100644
index 0000000..8bd9161
--- /dev/null
+++ b/icolos/core/workflow_steps/prediction/model_building.py
@@ -0,0 +1,269 @@
+import json
+import os
+import numpy as np
+import pandas as pd
+from collections import OrderedDict
+from copy import deepcopy
+from typing import Tuple, List
+
+from pydantic import BaseModel
+
+from icolos.core.containers.compound import Conformer
+from icolos.core.containers.generic import GenericData
+from icolos.utils.enums.program_parameters import ModelBuilderEnum
+from icolos.utils.enums.step_enums import StepModelBuilderEnum
+from icolos.core.workflow_steps.io.base import StepIOBase
+from icolos.core.workflow_steps.step import _LE, StepSettingsParameters
+from icolos.utils.enums.write_out_enums import WriteOutEnum
+from icolos.utils.execute_external.execute import Executor
+
+_SMBE = StepModelBuilderEnum()
+_SME = ModelBuilderEnum()
+_WE = WriteOutEnum()
+
+
+class StepModelBuilder(StepIOBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # initialize the executor
+        self._initialize_backend(executor=Executor)
+
+    def _generate_temporary_input_output_files(
+        self, tmp_dir: str
+    ) -> Tuple[str, str, str, str, str]:
+        tmp_input_config_json = os.path.join(tmp_dir, _SMBE.TMP_INPUT_CONFIG)
+        tmp_input_data_csv = os.path.join(tmp_dir, _SMBE.TMP_INPUT_DATA)
+        tmp_output_best_model_pkl = os.path.join(tmp_dir, _SMBE.TMP_OUTPUT_BEST_MODEL)
+        tmp_output_best_parameters_json = os.path.join(
+            tmp_dir, _SMBE.TMP_OUTPUT_BEST_PARAMETERS
+        )
+        tmp_output_production_pkl = os.path.join(
+            tmp_dir, _SMBE.TMP_OUTPUT_PRODUCTION_MODEL
+        )
+        return (
+            tmp_input_config_json,
+            tmp_input_data_csv,
+            tmp_output_best_model_pkl,
+            tmp_output_best_parameters_json,
+            tmp_output_production_pkl,
+        )
+
+    def _update_data_block(
+        self, conf: dict, tmp_input_data_csv: str, settings: StepSettingsParameters
+    ) -> dict:
+        # the user can specify additional things for the "data" block of the configuration
+        # in the "additional" field; the input CSV file needs to be overwritten in every case, though
+        specified_data_block = settings.additional.get(_SMBE.DATA, {})
+        for key in specified_data_block.keys():
+            conf[_SMBE.DATA][key] = specified_data_block[key]
+        conf[_SMBE.DATA][_SMBE.DATA_TRAININGSET_FILE] = tmp_input_data_csv
+        if _SMBE.DATA_TESTSET_FILE in conf[_SMBE.DATA].keys():
+            conf[_SMBE.DATA].pop(_SMBE.DATA_TESTSET_FILE, None)
+            self._logger.log(
+                f"Removed test set specification, not supported yet.", _LE.WARNING
+            )
+        return conf
+
+    def _write_OptunaAZ_configuration(
+        self,
+        tmp_input_config_json: str,
+        tmp_input_data_csv: str,
+        settings: StepSettingsParameters,
+    ):
+        config_path = settings.arguments.parameters[_SME.CONFIG]
+        with open(config_path, "r") as file:
+            optunaaz_conf = file.read().replace("\r", "").replace("\n", "")
+            optunaaz_conf = json.loads(optunaaz_conf)
+        optunaaz_conf = self._update_data_block(
+            optunaaz_conf, tmp_input_data_csv, settings
+        )
+        with open(tmp_input_config_json, "w") as file:
+            json.dump(optunaaz_conf, fp=file, indent=4)
+        self._logger.log(
+            f"Wrote updated OptunaAZ configuration file to {tmp_input_config_json}.",
+            _LE.DEBUG,
+        )
+
+    def _write_input_csv(
+        self,
+        conformers: List[Conformer],
+        tmp_input_data_csv: str,
+        settings: StepSettingsParameters,
+    ):
+        def _get_tag(conformer: Conformer, tag: str) -> str:
+            try:
+                value = conformer.get_molecule().GetProp(tag).strip()
+            except KeyError:
+                value = np.nan
+            return value
+
+        smiles_column = settings.additional[_SMBE.DATA][_SMBE.DATA_INPUT_COLUMN]
+        response_column = settings.additional[_SMBE.DATA][_SMBE.DATA_RESPONSE_COLUMN]
+
+        # initialize the dictionary
+        dict_result = OrderedDict()
+        dict_result[_WE.RDKIT_NAME] = ["" for _ in range(len(conformers))]
+        dict_result[smiles_column] = ["" for _ in range(len(conformers))]
+        dict_result[response_column] = ["" for _ in range(len(conformers))]
+
+        # populate the dictionary with the values
+        for irow in range(len(conformers)):
+            conf = conformers[irow]
+            dict_result[_WE.RDKIT_NAME][irow] = conf.get_index_string()
+            dict_result[smiles_column][irow] = _get_tag(conf, smiles_column)
+            dict_result[response_column][irow] = _get_tag(conf, response_column)
+
+        # do the writeout (after sanitation)
+        df_result = pd.DataFrame.from_dict(dict_result)
+        df_result.to_csv(
+            path_or_buf=tmp_input_data_csv,
+            sep=",",
+            na_rep="",
+            header=True,
+            index=False,
+            mode="w",
+            quoting=None,
+        )
+
+    def _get_arguments(
+        self,
+        tmp_input_config_json: str,
+        tmp_output_best_model_pkl: str,
+        tmp_output_best_parameters_json: str,
+        tmp_output_production_pkl: str,
+    ) -> List[str]:
+        arguments = [
+            _SME.CONFIG,
+            tmp_input_config_json,
+            _SME.MERGED_MODEL_OUTPATH,
+            tmp_output_production_pkl,
+            _SME.BEST_MODEL_OUTPATH,
+            tmp_output_best_model_pkl,
+            _SME.BEST_BUILDCONFIG_OUTPATH,
+            tmp_output_best_parameters_json,
+        ]
+        return arguments
+
+    def _parse_output(
+        self,
+        tmp_input_config_json: str,
+        tmp_input_data_csv: str,
+        tmp_output_best_parameters_json: str,
+        tmp_output_production_pkl: str,
+    ):
+        # loading the final model is crucial (and the end-artifact for this step)
+        try:
+            with open(tmp_output_production_pkl, "rb") as f:
+                data = f.read()
+                self.data.generic.add_file(
+                    GenericData(
+                        file_name=_SMBE.TMP_OUTPUT_PRODUCTION_MODEL, file_data=data
+                    )
+                )
+        except FileNotFoundError as e:
+            self._logger.log(
+                f"Could not load production model from path {tmp_output_production_pkl}.",
+                _LE.ERROR,
+            )
+            raise e
+
+        # loading the JSON with the best hyper-parameter configuration
+        try:
+            with open(tmp_output_best_parameters_json, "r") as f:
+                data = f.read().replace("\r", "").replace("\n", "")
+                data = json.loads(data)
+                self.data.generic.add_file(
+                    GenericData(
+                        file_name=_SMBE.TMP_OUTPUT_BEST_PARAMETERS, file_data=data
+                    )
+                )
+        except FileNotFoundError as e:
+            self._logger.log(
+                f"Could not load best hyper-parameter configuration from path {tmp_output_best_parameters_json}.",
+                _LE.WARNING,
+            )
+
+        # loading the input JSON for OptunaAZ
+        try:
+            with open(tmp_input_config_json, "r") as f:
+                data = f.read()
+                self.data.generic.add_file(
+                    GenericData(file_name=_SMBE.TMP_INPUT_CONFIG, file_data=data)
+                )
+        except FileNotFoundError as e:
+            self._logger.log(
+                f"Could not load input CSV file from path {tmp_input_config_json}.",
+                _LE.WARNING,
+            )
+
+        # loading the input CSV
+        try:
+            with open(tmp_input_config_json, "r") as f:
+                data = f.read()
+                self.data.generic.add_file(
+                    GenericData(file_name=_SMBE.TMP_INPUT_DATA, file_data=data)
+                )
+        except FileNotFoundError as e:
+            self._logger.log(
+                f"Could not load input CSV file from path {tmp_input_config_json}.",
+                _LE.WARNING,
+            )
+
+    def execute(self):
+        # make a copy of the settings to avoid side-effects with the dictionaries
+        settings = deepcopy(self.settings)
+
+        # generate temporary files
+        tmp_dir = self._move_to_temp_dir()
+        (
+            tmp_input_config_json,
+            tmp_input_data_csv,
+            tmp_output_best_model_pkl,
+            tmp_output_best_parameters_json,
+            tmp_output_production_pkl,
+        ) = self._generate_temporary_input_output_files(tmp_dir)
+
+        # write OptunaAZ configuration to file
+        self._write_OptunaAZ_configuration(
+            tmp_input_config_json=tmp_input_config_json,
+            tmp_input_data_csv=tmp_input_data_csv,
+            settings=settings,
+        )
+
+        # unroll all conformers
+        all_conformers = []
+        for compound in self.get_compounds():
+            for enumeration in compound:
+                all_conformers = all_conformers + enumeration.get_conformers()
+
+        # write input CSV, derived from the conformers
+        self._write_input_csv(
+            conformers=all_conformers,
+            tmp_input_data_csv=tmp_input_data_csv,
+            settings=settings,
+        )
+
+        # execute OptunaAZ
+        self._backend_executor.execute(
+            command=_SME.OPTBUILD_ENTRY_POINT,
+            arguments=self._get_arguments(
+                tmp_input_config_json=tmp_input_config_json,
+                tmp_output_best_model_pkl=tmp_output_best_model_pkl,
+                tmp_output_best_parameters_json=tmp_output_best_parameters_json,
+                tmp_output_production_pkl=tmp_output_production_pkl,
+            ),
+            check=False,
+        )
+
+        # parse the output
+        self._parse_output(
+            tmp_input_config_json=tmp_input_config_json,
+            tmp_input_data_csv=tmp_input_data_csv,
+            tmp_output_best_parameters_json=tmp_output_best_parameters_json,
+            tmp_output_production_pkl=tmp_output_production_pkl,
+        )
+
+        # clean-up
+        self._restore_working_dir()
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/prediction/predictor.py b/icolos/core/workflow_steps/prediction/predictor.py
new file mode 100644
index 0000000..0a80043
--- /dev/null
+++ b/icolos/core/workflow_steps/prediction/predictor.py
@@ -0,0 +1,86 @@
+import pickle
+from copy import deepcopy
+
+import numpy as np
+from typing import List
+
+from pydantic import BaseModel
+from rdkit import Chem
+
+from icolos.utils.general.icolos_exceptions import StepFailed, get_exception_message
+from icolos.utils.enums.step_enums import StepPredictorEnum
+from icolos.core.workflow_steps.io.base import StepIOBase
+from icolos.core.workflow_steps.step import _LE
+
+from icolos.utils.general.convenience_functions import *
+
+_SPE = StepPredictorEnum()
+
+
+class StepPredictor(StepIOBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    @classmethod
+    def _load_scikit_model(cls, model_path: str):
+        with open(model_path, "rb") as f:
+            scikit_model = pickle.load(f)
+            return scikit_model
+
+    def _get_feature_values(
+        self, conformer: Chem.Mol, feature_names: List[str]
+    ) -> np.ndarray:
+        list_values = []
+        for feature in feature_names:
+            try:
+                list_values.append(float(conformer.GetProp(feature)))
+            except KeyError as e:
+                self._logger.log(
+                    f"Could not find feature / property, error message: {get_exception_message(e)}",
+                    _LE.ERROR,
+                )
+                raise e
+
+        # cast list to 2D array
+        return np.array([list_values])
+
+    def execute(self):
+        # get parameters
+        parameters = deepcopy(self.settings.additional)
+        model_path = nested_get(parameters, _SPE.MODEL_PATH, default=None)
+        feature_names = nested_get(parameters, _SPE.FEATURES, default=None)
+        name_predicted = nested_get(
+            parameters, _SPE.NAME_PREDICTED, default=_SPE.NAME_PREDICTED_DEFAULT
+        )
+
+        # check parameters; model_path and features are mandatory
+        if model_path is None or feature_names is None:
+            message = f"Parameters {_SPE.MODEL_PATH} (path to model) and {_SPE.FEATURES} (list with features) have to be set - abort."
+            self._logger.log(message, _LE.ERROR)
+            raise StepFailed(message)
+        if name_predicted == _SPE.NAME_PREDICTED_DEFAULT:
+            self._logger.log(
+                f"Name of predicted property not specified, using default value {_SPE.NAME_PREDICTED_DEFAULT} instead (not recommended).",
+                _LE.WARNING,
+            )
+
+        # load model from file and predict endpoint
+        model = self._load_scikit_model(model_path=model_path)
+        predicted = 0
+        for compound in self.get_compounds():
+            for enumeration in compound.get_enumerations():
+                for conformer in enumeration.get_conformers():
+                    if not self._input_object_valid(conformer):
+                        continue
+
+                    f_values = self._get_feature_values(
+                        conformer=conformer.get_molecule(), feature_names=feature_names
+                    )
+                    conformer.get_molecule().SetProp(
+                        name_predicted, str(model.predict(X=f_values)[0])
+                    )
+                    predicted += 1
+        self._logger.log(
+            f"Predicted {name_predicted} for {predicted} conformers in {len(self.get_compounds())} compounds.",
+            _LE.INFO,
+        )
diff --git a/icolos/core/workflow_steps/schrodinger/__init__.py b/icolos/core/workflow_steps/schrodinger/__init__.py
new file mode 100644
index 0000000..010d3cc
--- /dev/null
+++ b/icolos/core/workflow_steps/schrodinger/__init__.py
@@ -0,0 +1,9 @@
+from icolos.core.workflow_steps.schrodinger.prime import StepPrime
+from icolos.core.workflow_steps.schrodinger.macromodel import StepMacromodel
+from icolos.core.workflow_steps.schrodinger.ligprep import StepLigprep
+from icolos.core.workflow_steps.schrodinger.glide import StepGlide
+from icolos.core.workflow_steps.schrodinger.prepwizard import StepPrepwizard
+from icolos.core.workflow_steps.schrodinger.fep_plus_setup import StepFepPlusSetup
+from icolos.core.workflow_steps.schrodinger.fep_plus_execution import StepFepPlusExec
+from icolos.core.workflow_steps.schrodinger.desmond_preprocessor import StepDesmondSetup
+from icolos.core.workflow_steps.schrodinger.desmond_exec import StepDesmondExec
diff --git a/icolos/core/workflow_steps/schrodinger/base.py b/icolos/core/workflow_steps/schrodinger/base.py
new file mode 100644
index 0000000..7a1ba8f
--- /dev/null
+++ b/icolos/core/workflow_steps/schrodinger/base.py
@@ -0,0 +1,366 @@
+import os
+from typing import Optional, Iterable, Union
+from pydantic import BaseModel
+from icolos.core.workflow_steps.step import StepBase
+from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum
+from icolos.utils.execute_external.license_token_guard import (
+    TokenGuardParameters,
+    SchrodingerLicenseTokenGuard,
+)
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.utils.execute_external.schrodinger import SchrodingerExecutor
+from icolos.utils.enums.step_enums import StepDesmondEnum
+from icolos.core.workflow_steps.step import _LE
+import re
+from shutil import copy
+from typing import Dict
+
+
+_EE = SchrodingerExecutablesEnum()
+_SDE = StepDesmondEnum()
+
+
+class StepSchrodingerBase(StepBase, BaseModel):
+
+    token_guard: Optional[TokenGuardParameters] = None
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def _apply_token_guard(self):
+        if self.token_guard is not None:
+            token_guard = SchrodingerLicenseTokenGuard(token_guard=self.token_guard)
+            token_guard.guard()
+
+    # TODO: Deprecated - use self.converter
+    def _translate_SDF_to_MAE(
+        self, sdf_path: str, mae_path: str, executor: SchrodingerExecutor
+    ):
+        """As "Glide" is only able to read MAE (Maestro) files, write the ligands out in that format."""
+
+        # call "sdconvert" from Schrodinger's software
+        arguments = [
+            "".join([_EE.SDCONVERT_I, _EE.SDCONVERT_FORMAT_SD]),
+            sdf_path,
+            "".join([_EE.SDCONVERT_O, _EE.SDCONVERT_FORMAT_MAE]),
+            mae_path,
+        ]
+        execution_result = executor.execute(
+            command=_EE.SDCONVERT, arguments=arguments, check=True
+        )
+
+    def _translate_MAE_to_SDF(
+        self, mae_path: str, sdf_path: str, executor: SchrodingerExecutor
+    ):
+        """In cases where the write-out mode for Glide is not producing SDF files."""
+
+        # call "sdconvert" from Schrodinger's software
+        arguments = [
+            "".join([_EE.SDCONVERT_I, _EE.SDCONVERT_FORMAT_MAE]),
+            mae_path,
+            "".join([_EE.SDCONVERT_O, _EE.SDCONVERT_FORMAT_SD]),
+            sdf_path,
+        ]
+        execution_result = executor.execute(
+            command=_EE.SDCONVERT, arguments=arguments, check=True
+        )
+
+    def _translate_PDB_to_MAE(
+        self, pdb_path: str, mae_path: str, executor: SchrodingerExecutor
+    ):
+        """In cases where the write-out mode for Glide is not producing SDF files."""
+
+        # call "sdconvert" from Schrodinger's software
+        arguments = [
+            "".join([_EE.SDCONVERT_I, _EE.STRUCTCAT_FORMAT_PDB]),
+            pdb_path,
+            "".join([_EE.SDCONVERT_O, _EE.SDCONVERT_FORMAT_MAE]),
+            mae_path,
+        ]
+        execution_result = executor.execute(
+            command=_EE.STRUCTCONVERT, arguments=arguments, check=True
+        )
+
+    def _replace_config_value(self, key, value, config):
+        value = str(value)
+        pattern = fr"({key} =).*"
+        pattern = re.compile(pattern)
+        config = re.sub(pattern, fr"\1 {value}", config)
+        return config
+
+    def _get_template(self, file_name):
+        file = [
+            file
+            for file in os.listdir(attach_root_path("icolos/config/desmond"))
+            if file_name in file
+        ]
+        assert len(file) == 1
+        return file[0]
+
+    def _write_config(self, tmp_dir, dict_: Dict, file_name):
+        # see if a config file was specified, assume no further changes:
+        if _SDE.CONFIG in dict_.keys() and dict_[_SDE.CONFIG] is not None:
+            copy(dict_[_SDE.CONFIG], tmp_dir)
+        else:
+            template = self._get_template(file_name)
+            with open(attach_root_path(f"icolos/config/desmond/{template}"), "r") as f:
+                config = f.read()
+            for k, v in dict_.items():
+                config = self._replace_config_value(k, v, config)
+
+            self._logger.log(f"Compiled  file {file_name}...", _LE.DEBUG)
+            for line in config.split("\n"):
+                self._logger_blank.log(line, _LE.DEBUG)
+            with open(os.path.join(tmp_dir, file_name), "w") as f:
+                f.write(config)
+
+    def _parse_arguments(self, defaults):
+        args = []
+
+        for flag in self.settings.arguments.flags:
+            args.append(flag)
+        if "-WAIT" not in args:
+            args.append("-WAIT")
+        for k, v in self.settings.arguments.parameters.items():
+            args.append(k)
+            args.append(v)
+        for k, v in defaults.items():
+            if k not in args:
+                args.append(k)
+                args.append(v)
+        return args
+
+    @staticmethod
+    def _parse_maestro_in_file(
+        lines: Iterable[str],
+    ) -> Dict[str, Union[str, Dict[str, str]]]:
+        """Parses Maestro input, and returns keywords dict for it."""
+
+        separator3 = "   "
+        indent4 = "    "
+        block_starters = {
+            "[CONSTRAINT_GROUP",
+            "[FEATURE",
+        }
+
+        # All Glide keywords. Get all keywords with:
+        #   $ module load schrodinger
+        #   $ glide -docking-keywords | cut -d' ' -f1 | sed 's/.*/"&"/' | paste -sd , -
+        # List keywords, get first word, wrap in quotes, join lines.
+        # See:
+        #   - https://stackoverflow.com/a/19145499
+        #   - https://unix.stackexchange.com/a/251362
+        allowed_keywords = {
+            "AMIDE_MODE",
+            "AMIDE_TRANS_ALL",
+            "AMIDE_TRANSTOL",
+            "ASL_RES_INTERACTION",
+            "CALC_INPUT_RMS",
+            "CANONICALIZE",
+            "COMPRESS_POSES",
+            "CORE_ATOMS",
+            "CORE_DEFINITION",
+            "CORE_FILTER",
+            "CORE_POS_MAX_RMSD",
+            "CORE_RESTRAIN",
+            "CORE_RESTRAIN_V",
+            "CORE_SMARTS",
+            "CORE_SNAP",
+            "CORECONS_FALLBACK",
+            "CSV_PROPS_FILE",
+            "CV_CUTOFF",
+            "DIELMOD",
+            "DOCKING_METHOD",
+            "DOINTRA",
+            "DOINTRA_SCALE",
+            "DSCORE_CUTOFF",
+            "EPIK_PENALTIES",
+            "EXPANDED_SAMPLING",
+            "FITDEN",
+            "FORCEFIELD",
+            "FORCEPLANAR",
+            "GLIDE_CONFGEN_BADDIST2",
+            "GLIDE_CONFGEN_EFCUT",
+            "GLIDE_CONS_FEAT_FILE",
+            "GLIDE_CONS_FINALONLY",
+            "GLIDE_CONS_RMETCOORD",
+            "GLIDE_CONS_RNOEMAX",
+            "GLIDE_CONS_RNOEMIN",
+            "GLIDE_CONS_RPOS",
+            "GLIDE_CONS_XMETCOORD",
+            "GLIDE_CONS_XNOE",
+            "GLIDE_CONS_XPOS",
+            "GLIDE_CONS_YMETCOORD",
+            "GLIDE_CONS_YNOE",
+            "GLIDE_CONS_YPOS",
+            "GLIDE_CONS_ZMETCOORD",
+            "GLIDE_CONS_ZNOE",
+            "GLIDE_CONS_ZPOS",
+            "GLIDE_DIELCO",
+            "GLIDE_ELEMENTS",
+            "GLIDE_EXVOL_PENAL_NUM",
+            "GLIDE_EXVOL_PENAL_STRENGTH",
+            "GLIDE_NTOTALCONS",
+            "GLIDE_NUMEXVOL",
+            "GLIDE_NUMMETCOORDCONS",
+            "GLIDE_NUMMETCOORDSITES",
+            "GLIDE_NUMNOECONS",
+            "GLIDE_NUMPOSITCONS",
+            "GLIDE_NUMUSEXVOL",
+            "GLIDE_OUTPUT_USEHTOR",
+            "GLIDE_REFLIG_FORMAT",
+            "GLIDE_REXVOL",
+            "GLIDE_REXVOLIN",
+            "GLIDE_TORCONS_ALLBONDS",
+            "GLIDE_TORCONS_IATOMS",
+            "GLIDE_TORCONS_JATOMS",
+            "GLIDE_TORCONS_KATOMS",
+            "GLIDE_TORCONS_LATOMS",
+            "GLIDE_TORCONS_PATTERN_INDEX",
+            "GLIDE_TORCONS_PATTERNS",
+            "GLIDE_TORCONS_SETVAL",
+            "GLIDE_TORCONS_VALUES",
+            "GLIDE_TORCONSFILE",
+            "GLIDE_XEXVOL",
+            "GLIDE_XP_NMAXCORE",
+            "GLIDE_XP_RMSCUT",
+            "GLIDE_YEXVOL",
+            "GLIDE_ZEXVOL",
+            "GLIDECONS",
+            "GLIDECONSFEATATOMS",
+            "GLIDECONSFEATHASINCLUDE",
+            "GLIDECONSFEATINCLUDE",
+            "GLIDECONSFEATINDEX",
+            "GLIDECONSFEATPATTERNS",
+            "GLIDECONSGROUPNREQUIRED",
+            "GLIDECONSNAMES",
+            "GLIDECONSUSEMET",
+            "GLIDESCORUSEMET",
+            "GLIDEUSEALLEXVOL",
+            "GLIDEUSECONSFEAT",
+            "GLIDEUSECONSFEATINDEX",
+            "GLIDEUSECONSGROUPINDEX",
+            "GLIDEUSECONSLABELS",
+            "GLIDEUSEXVOL",
+            "GLIDEUSEXVOLNAMES",
+            "GLIDEXVOLNAMES",
+            "GRIDFILE",
+            "GSCORE",
+            "GSCORE_CUTOFF",
+            "HAVEGLIDECONSFEAT",
+            "HBOND_ACCEP_HALO",
+            "HBOND_CUTOFF",
+            "HBOND_DONOR_AROMH",
+            "HBOND_DONOR_AROMH_CHARGE",
+            "HBOND_DONOR_HALO",
+            "INCLUDE_INPUT_CONF",
+            "INCLUDE_INPUT_RINGS",
+            "JOBNAME",
+            "KEEP_SUBJOB_POSES",
+            "KEEPRAW",
+            "KEEPSKIPPED",
+            "LIG_CCUT",
+            "LIG_MAECHARGES",
+            "LIG_VSCALE",
+            "LIGAND_END",
+            "LIGAND_START",
+            "LIGANDFILE",
+            "LIGANDFILES",
+            "LIGFORMAT",
+            "LIGPREP",
+            "LIGPREP_ARGS",
+            "MACROCYCLE",
+            "MACROCYCLE_OPTIONS",
+            "MAX_ITERATIONS",
+            "MAXATOMS",
+            "MAXKEEP",
+            "MAXREF",
+            "MAXROTBONDS",
+            "METAL_CUTOFF",
+            "NENHANCED_SAMPLING",
+            "NMAXRMSSYM",
+            "NOSORT",
+            "NREPORT",
+            "NREQUIRED_CONS",
+            "OUTPUTDIR",
+            "PAIRDISTANCES",
+            "PEPTIDE",
+            "PHASE_DB",
+            "PHASE_NCONFS",
+            "PHASE_SUBSET",
+            "POSE_DISPLACEMENT",
+            "POSE_HTORSION",
+            "POSE_OUTTYPE",
+            "POSE_RMSD",
+            "POSES_PER_LIG",
+            "POSTDOCK",
+            "POSTDOCK_ITMAX",
+            "POSTDOCK_NPOSE",
+            "POSTDOCK_SCITMAX",
+            "POSTDOCK_XP_DELE",
+            "POSTDOCKCG",
+            "POSTDOCKLIGMIN",
+            "POSTDOCKSTRAIN",
+            "PRECISION",
+            "PREMIN",
+            "PREMINCG",
+            "PREMINELEC",
+            "PREMINITMAX",
+            "RADIUS_RES_INTERACTION",
+            "REF_LIGAND_FILE",
+            "REFINDEX",
+            "REPORT_CPU_TIME",
+            "REWARD_INTRA_HBONDS",
+            "RINGCONFCUT",
+            "RINGONFLY",
+            "SAMPLE_N_INVERSIONS",
+            "SAMPLE_RINGS",
+            "SCORE_INPUT_POSE",
+            "SCORE_MINIMIZED_INPUT_POSE",
+            "SCORING_CUTOFF",
+            "SHAPE_ATOMS",
+            "SHAPE_RESTRAIN",
+            "SHAPE_TYPING",
+            "SKIP_EPIK_METAL_ONLY",
+            "STRAIN_GSFACTOR",
+            "STRAIN_GSTHRESH",
+            "STRAINELEC",
+            "SUBSTRATE_PENAL_FILE",
+            "USE_CONS",
+            "USE_REF_LIGAND",
+            "USECOMPMAE",
+            "WRITE_CSV",
+            "WRITE_RES_INTERACTION",
+            "WRITE_TIMINGS_CSV",
+            "WRITE_XP_DESC",
+            "WRITEREPT",
+        }
+
+        result = {}
+        current_block = None
+        for linenum, line in enumerate(lines):
+            if any(line.startswith(starter) for starter in block_starters):
+                # Block start.
+                current_block = line.strip()
+                result[current_block] = {}
+            elif line.strip() == "":
+                # Empty line: close current block if any is open, and skip the line.
+                current_block = None
+            elif line.startswith(indent4):
+                # Indented line inside the block.
+                if current_block is None:
+                    raise ValueError(
+                        f"Unexpected indent outside of block for line {linenum}: {line}"
+                    )
+                kw, value = line.strip().split(sep=separator3, maxsplit=1)
+                result[current_block][kw] = value.strip('"')
+            elif any(line.startswith(kw) for kw in allowed_keywords):
+                # Ordinary keywords.
+                kw, value = line.strip().split(sep=separator3, maxsplit=1)
+                result[kw] = value
+            else:
+                raise ValueError(
+                    f"Unexpected line {linenum} in maestro input file: {line}"
+                )
+
+        return result
diff --git a/icolos/core/workflow_steps/schrodinger/desmond_exec.py b/icolos/core/workflow_steps/schrodinger/desmond_exec.py
new file mode 100644
index 0000000..76f5858
--- /dev/null
+++ b/icolos/core/workflow_steps/schrodinger/desmond_exec.py
@@ -0,0 +1,114 @@
+import os
+from icolos.core.step_utils.structconvert import StructConvert
+from pydantic import BaseModel
+from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase
+from icolos.utils.execute_external.schrodinger import SchrodingerExecutor
+from icolos.utils.enums.step_enums import StepDesmondEnum
+from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum
+
+_SDE = StepDesmondEnum()
+_SEE = SchrodingerExecutablesEnum()
+
+
+class StepDesmondExec(StepSchrodingerBase, BaseModel):
+    """
+    Executes a full Desmond multisim workflow
+    """
+
+    class Config:
+        underscore_attrs_are_private = True
+        arbitrary_types_allowed = True
+
+    _struct_converter: StructConvert = None
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(SchrodingerExecutor)
+        self._check_backend_availability()
+        self._struct_converter = StructConvert(
+            binary_location=self.execution.binary_location,
+            prefix_execution=self.execution.prefix_execution,
+        )
+
+    def execute(self):
+        # takes in the cms file from the preprocessor and runs the full multisim workflow on it
+        tmp_dir = self._make_tmpdir()
+        structure = self.data.generic.get_argument_by_extension(
+            "pdb", rtn_file_object=True
+        )
+        pdb_id = structure.get_file_name()
+        structure.write(tmp_dir)
+        # convert the pdb file to mae
+        self._struct_converter.pdb2mae(
+            os.path.join(tmp_dir, pdb_id),
+            os.path.join(tmp_dir, "desmond_md_job_1.mae"),
+        )
+
+        preprocess_defaults = {
+            "-HOST": "localhost",
+            "-JOBNAME": "desmond_md_job_1",
+            "-m": "config.msj desmond_md_job_1.mae",
+            "-o": "setup.cms",
+        }
+        arguments = self._parse_arguments(preprocess_defaults)
+        # compile and write the msj to the tmpdir
+        config_dict = (
+            self.settings.additional[_SDE.SETUP_MSJ_FIELDS]
+            if _SDE.SETUP_MSJ_FIELDS in self.settings.additional.keys()
+            else {}
+        )
+
+        self._write_config(tmp_dir, dict_=config_dict, file_name=_SDE.PREPROCESS_MSJ)
+
+        # execute
+        self._backend_executor.execute(
+            command=_SEE.MULTISIM_EXEC,
+            arguments=arguments,
+            check=True,
+            location=tmp_dir,
+        )
+
+        exec_defaults = {
+            "-HOST": "localhost",
+            "-JOBNAME": "desmond_production",
+            "-maxjob": "1",
+            "-cpu": "1",
+            "-m": _SDE.PRODUCTION_MSJ,
+            "-c": _SDE.PRODUCTION_CFG,
+            "-description": '"Molecular Dynamics" setup.cms',
+            "-mode": "umbrella",
+            "-PROJ": tmp_dir,
+            "-o": "out.cms",
+            "-lic": _SDE.TOKEN_STR,
+        }
+
+        msj_config_dict = (
+            self.settings.additional[_SDE.MSJ_FIELDS]
+            if _SDE.MSJ_FIELDS in self.settings.additional.keys()
+            else {}
+        )
+
+        cfg_config_dict = (
+            self.settings.additional[_SDE.CFG_FIELDS]
+            if _SDE.CFG_FIELDS in self.settings.additional.keys()
+            else {}
+        )
+
+        # write the config files: msj for the full workflow, and a cfg for the production sim
+        self._write_config(tmp_dir, msj_config_dict, _SDE.PRODUCTION_MSJ)
+
+        self._write_config(tmp_dir, cfg_config_dict, _SDE.PRODUCTION_CFG)
+
+        arguments = self._parse_arguments(exec_defaults)
+
+        self._backend_executor.execute(
+            command=_SEE.MULTISIM_EXEC,
+            arguments=arguments,
+            check=True,
+            location=tmp_dir,
+        )
+
+        self._parse_output(tmp_dir)
+
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/schrodinger/desmond_preprocessor.py b/icolos/core/workflow_steps/schrodinger/desmond_preprocessor.py
new file mode 100644
index 0000000..eba9eed
--- /dev/null
+++ b/icolos/core/workflow_steps/schrodinger/desmond_preprocessor.py
@@ -0,0 +1,75 @@
+import os
+from icolos.core.step_utils.structconvert import StructConvert
+from pydantic import BaseModel
+from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase
+from icolos.utils.execute_external.schrodinger import SchrodingerExecutor
+from icolos.utils.enums.step_enums import StepDesmondEnum
+from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum
+
+_SDE = StepDesmondEnum()
+_SEE = SchrodingerExecutablesEnum()
+
+
+class StepDesmondSetup(StepSchrodingerBase, BaseModel):
+    """
+    Run preprocessing step to generate system for Desmond simulation
+    """
+
+    _struct_converter: StructConvert = None
+
+    class Config:
+        underscore_attrs_are_private = True
+        arbitrary_types_allowed = True
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        self._initialize_backend(SchrodingerExecutor)
+        self._check_backend_availability()
+        self._struct_converter = StructConvert(
+            binary_location=self.execution.binary_location,
+            prefix_execution=self.execution.prefix_execution,
+        )
+
+    def execute(self):
+        tmp_dir = self._make_tmpdir()
+
+        # need to take a structure file, possibly preprocess if pdb
+        # get the structure file and extract the file name
+        structure = self.data.generic.get_argument_by_extension(
+            "pdb", rtn_file_object=True
+        )
+        pdb_id = structure.get_file_name()
+        structure.write(tmp_dir)
+        # convert the pdb file to mae
+
+        self._struct_converter.pdb2mae(
+            os.path.join(tmp_dir, pdb_id),
+            os.path.join(tmp_dir, "desmond_md_job_1.mae"),
+        )
+
+        defaults = {
+            "-HOST": "localhost",
+            "-JOBNAME": "desmond_md_job_1",
+            "-m": "config.msj desmond_md_job_1.mae",
+            "-o": "setup.cms",
+        }
+        arguments = self._parse_arguments(defaults)
+        # compile and write the msj to the tmpdir
+        config_dict = (
+            self.settings.additional[_SDE.MSJ_FIELDS]
+            if _SDE.MSJ_FIELDS in self.settings.additional.keys()
+            else {}
+        )
+
+        self._write_config(tmp_dir, dict_=config_dict, file_name=_SDE.PREPROCESS_MSJ)
+
+        # execute
+        self._backend_executor.execute(
+            command=_SEE.MULTISIM_EXEC,
+            arguments=arguments,
+            check=True,
+            location=tmp_dir,
+        )
+        self._parse_output(tmp_dir)
+
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/schrodinger/fep_analysis.py b/icolos/core/workflow_steps/schrodinger/fep_analysis.py
new file mode 100644
index 0000000..5d93f83
--- /dev/null
+++ b/icolos/core/workflow_steps/schrodinger/fep_analysis.py
@@ -0,0 +1,27 @@
+from icolos.core.workflow_steps.schrodinger.fep_base import StepFEPBase
+from icolos.utils.enums.step_enums import StepBaseEnum, StepFepPlusEnum
+from icolos.utils.enums.program_parameters import FepPlusEnum
+
+from pydantic import BaseModel
+
+_FE = FepPlusEnum()
+_SFE = StepFepPlusEnum()
+_SBE = StepBaseEnum
+
+
+class StepFepPlusAnalysis(StepFEPBase, BaseModel):
+    """
+    Standalone class to analyse data from a previous fep job
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def execute(self):
+        """
+        Analyses the map produced from an FEP run
+        """
+        tmp_dir = self._make_tmpdir()
+        self.data.generic.write_out_all_files(tmp_dir)
+        self._extract_log_file_data(tmp_dir)
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/schrodinger/fep_base.py b/icolos/core/workflow_steps/schrodinger/fep_base.py
new file mode 100644
index 0000000..426fa70
--- /dev/null
+++ b/icolos/core/workflow_steps/schrodinger/fep_base.py
@@ -0,0 +1,211 @@
+from pydantic import BaseModel
+from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase
+import numpy as np
+from scipy.sparse import csr_matrix
+from scipy.sparse.csgraph import shortest_path
+from icolos.utils.enums.step_enums import StepFepPlusEnum
+from typing import List
+import time
+import os
+from icolos.core.workflow_steps.step import _LE
+
+_SFE = StepFepPlusEnum()
+
+
+class StepFEPBase(StepSchrodingerBase, BaseModel):
+    """
+    Base class containing common functionality for Schrodinger FEP+ workflows
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def _parse_output(self, tmp_dir):
+        # pick up the final annotated map construction
+        self.data.generic.clear_file_dict()
+        self._logger.log(f"Reading output map.", _LE.INFO)
+        data = None
+        counts = 0
+        # hold whilst the job data gets written to local fs
+        while data is None and counts < 50000:
+            try:
+                path = [
+                    file
+                    for file in os.listdir(tmp_dir)
+                    if file.endswith(_SFE.FMP_OUTPUT_FILE)
+                ]
+                assert len(path) == 1
+                path = path[0]
+                with open(os.path.join(tmp_dir, path), "rb") as f:
+                    data = f.read()
+            except AssertionError:
+                self._logger.log(
+                    "Output file has not yet appeared in the file system, sleeping and retrying...",
+                    _LE.INFO,
+                )
+                time.sleep(15)
+                counts += 1
+
+        self._add_data_to_generic(path, data)
+
+    def _extract_log_file_data(self, tmp_dir):
+        """
+        Parses FEP log file to extract edge and node properties
+        """
+        lines = None
+        counts = 0
+        # wait whilst job sits in the queue
+        while lines is None and counts < 50000:
+            try:
+                log_file = [
+                    file for file in os.listdir(tmp_dir) if file.endswith(_SFE.LOGFILE)
+                ]
+                assert len(log_file) == 1
+                log_file = log_file[0]
+
+                with open(os.path.join(tmp_dir, log_file), "r") as f:
+                    lines = f.readlines()
+
+                edge_header_index = [
+                    idx for idx, s in enumerate(lines) if _SFE.EDGE_HEADER_LINE in s
+                ][-1]
+                node_header_index = [
+                    idx for idx, s in enumerate(lines) if _SFE.NODE_HEADER_LINE in s
+                ][-1]
+                end_of_data_index = [
+                    idx for idx, s in enumerate(lines) if _SFE.DATA_TERMINUS in s
+                ][0]
+
+                edge_data_lines = [
+                    line
+                    for line in lines[edge_header_index + 3 : node_header_index - 1]
+                ]
+                node_data_lines = [
+                    line
+                    for line in lines[node_header_index + 3 : end_of_data_index - 1]
+                ]
+
+                self._process_edge_lines(edge_data_lines)
+                self._process_node_lines(node_data_lines)
+
+            except AssertionError:
+                self._logger.log(
+                    "Log file has not yet appeared in the file system, sleeping and retrying...",
+                    _LE.INFO,
+                )
+                time.sleep(15)
+                counts += 1
+
+    def _process_node_lines(self, data: List[str]) -> None:
+        for entry in data:
+            fields = entry.split()
+            idx = fields[1]
+            dG = fields[2]
+            # attach dG tags to compound objects if present
+            if self.data.compounds:
+                # account for running this step compoundless
+                self.data.compounds[int(idx[0])].get_enumerations()[0].get_conformers()[
+                    0
+                ].get_molecule().SetProp("dG", str(dG))
+            self._logger.log(
+                f"dG directly from the output file for compound {idx} is {dG} ",
+                _LE.INFO,
+            )
+
+    def _process_edge_lines(self, edge_data: List[str]) -> None:
+        """
+        Calibrate dG values using a reference compound and edge ddG from log file output, return dG for each compound
+        """
+
+        # caluclate the max ligand index, accounting for ligands that may have been skipped in previous steps, so can't rely on self.get_compounds()
+        len_nodes = 0
+        for line in edge_data:
+            parts = line.split()
+
+            lig_from = int(parts[1].split(":")[0])
+            lig_to = int(parts[3].split(":")[0])
+            for idx in [lig_from, lig_to]:
+                if idx > len_nodes:
+                    len_nodes = idx
+        len_nodes += 1  # account for zero indexed ligands
+
+        error_matrix = np.zeros((len_nodes, len_nodes))
+        ddG_matrix = np.zeros((len_nodes, len_nodes))
+        for line in edge_data:
+            parts = line.split()
+            try:
+                # parse the compound info from the log file
+                lig_from = int(parts[1].split(":")[0])
+                lig_to = int(parts[3].split(":")[0])
+                ddG = float(parts[4].split("+-")[0])
+                err = float(parts[4].split("+-")[1])
+            except ValueError:
+                self._logger.log(
+                    f"Line: {line} from the logfile contained an unexpected datatype - cannot process this edge - skipping",
+                    _LE.WARNING,
+                )
+                continue
+
+            error_matrix[lig_from, lig_to] = err
+            error_matrix[lig_to, lig_from] = err
+            ddG_matrix[lig_from, lig_to] = ddG
+            ddG_matrix[lig_to, lig_from] = -ddG
+        error_matrix = csr_matrix(error_matrix)
+        # compute shortest path from one ligand to the anchor
+        _, predecessors = shortest_path(
+            error_matrix, directed=False, return_predecessors=True, indices=0
+        )
+        self._construct_dg_per_compound(ddG_matrix, predecessors, error_matrix)
+
+    def _construct_dg_per_compound(
+        self, ddG: np.ndarray, predecessors: List, error_matrix: np.ndarray
+    ) -> None:
+        """
+        Calculate the calibrated binding free energy per compound using a reference value
+        Attach calcualted dG to compounds
+        """
+        try:
+            ref_dG = self.settings.additional[_SFE.REFERENCE_DG]
+        except KeyError:
+            self._logger.log(
+                "Expected to find a reference dG value for the lead compound, but none was found."
+                "Defaulting to 0.00, you will need to apply a manual correction afterwards",
+                _LE.WARNING,
+            )
+            ref_dG = 0.00
+
+        def _calculate_dg(comp_num: int, dG=ref_dG, err=0):
+            prev_index = predecessors[comp_num]
+            dG += ddG[prev_index, comp_num]
+            err += error_matrix[prev_index, comp_num]
+            if prev_index != 0:
+                _calculate_dg(prev_index, dG=dG, err=err)
+            else:
+                data = str(round(dG, 2)) + "+-" + str(round(err, 2))
+                self.data.compounds[idx].get_enumerations()[0].get_conformers()[
+                    0
+                ].get_molecule().SetProp("map_dG", data)
+                self._logger.log(
+                    f"Calculated dG from spanning tree for compound {idx} is {data}",
+                    _LE.INFO,
+                )
+
+        for comp in self.get_compounds():
+            idx = comp.get_compound_number()
+            # check whether the compound appeared in the final map
+            try:
+
+                if idx == 0:
+                    comp.get_enumerations()[0].get_conformers()[
+                        0
+                    ].get_molecule().SetProp(
+                        "map_dG", str(self.settings.additional[_SFE.REFERENCE_DG])
+                    )
+                if idx != 0:  # skip the reference compound
+                    _calculate_dg(idx)
+            except IndexError:
+                self._logger.log(
+                    f"Compound {idx} was not found in the output map, it was likely dropped during the workflow",
+                    _LE.WARNING,
+                )
+                continue
diff --git a/icolos/core/workflow_steps/schrodinger/fep_plus_execution.py b/icolos/core/workflow_steps/schrodinger/fep_plus_execution.py
new file mode 100644
index 0000000..5e4af72
--- /dev/null
+++ b/icolos/core/workflow_steps/schrodinger/fep_plus_execution.py
@@ -0,0 +1,192 @@
+from copy import deepcopy
+from typing import List
+
+from icolos.utils.enums.step_enums import StepBaseEnum, StepFepPlusEnum
+from icolos.utils.enums.program_parameters import FepPlusEnum
+from icolos.utils.execute_external.fep_plus import FepPlusExecutor
+
+from pydantic import BaseModel, PrivateAttr
+import os
+from icolos.core.workflow_steps.step import _LE
+import time
+from icolos.core.workflow_steps.schrodinger.fep_base import StepFEPBase
+
+from icolos.utils.general.icolos_exceptions import StepFailed
+
+_FE = FepPlusEnum()
+_SFE = StepFepPlusEnum()
+_SBE = StepBaseEnum
+
+
+class StepFepPlusExec(StepFEPBase, BaseModel):
+    """
+    Execute the FEP+ workflow, interfaced with AWS
+    """
+
+    class Config:
+        underscore_attrs_are_private = True
+
+    _job_id = PrivateAttr()
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=FepPlusExecutor)
+        self._check_backend_availability()
+
+        self._job_id = None
+
+    def _parse_arguments(self):
+        parameters = deepcopy(self.settings.arguments.parameters)
+        arguments = []
+        if len(self.settings.arguments.flags) > 0:
+            for flag in self.settings.arguments.flags:
+                arguments.append(str(flag))
+        if parameters:
+            for key in parameters.keys():
+                arguments.append(key)
+                if parameters[key] is not None and parameters[key] != "":
+                    arguments.append(str(parameters[key]))
+        # for our AWS config, need to set processors per job =1
+        if "-ppj" not in arguments:
+            arguments.extend(["-ppj", "1"])
+            self._logger.log(
+                "Set -ppj 1 for AWS execution, since no override was specified",
+                _LE.DEBUG,
+            )
+        if _SFE.RETRIES not in arguments:
+            arguments.extend([_SFE.RETRIES, "3"])
+        arguments.append(_SFE.FMP_OUTPUT_FILE)
+
+        # remove "-WAIT" if it has been set, as this will interfere with the implementation (and might cause issues
+        # due to file system write buffering)
+        if _SFE.WAIT_FLAG in arguments:
+            self._logger.log(
+                "Ignoring -WAIT flag for FEP+ execution (this would interfere with the implementation).",
+                _LE.WARNING,
+            )
+            arguments = [arg for arg in arguments if arg != _SFE.WAIT_FLAG]
+        return arguments
+
+    def _unit_test_simulate_output(self, fmp_data, log_data):
+        # call this method from the unit instead of the execute method to write out the expected output
+        tmp_dir = self._make_tmpdir()
+        with open(
+            os.path.join(
+                tmp_dir,
+                f"{self.settings.arguments.parameters[_SFE.JOBNAME_FLAG]}_{_SFE.FMP_OUTPUT_FILE}",
+            ),
+            "w",
+        ) as f:
+            f.write(fmp_data)
+        with open(
+            os.path.join(
+                tmp_dir,
+                f"{self.settings.arguments.parameters[_SFE.JOBNAME_FLAG]}_{_SFE.LOGFILE}",
+            ),
+            "w",
+        ) as f:
+            f.write(fmp_data)
+        self._parse_output(tmp_dir)
+        self._extract_log_file_data(tmp_dir)
+        self._remove_temporary(tmp_dir)
+
+    def _get_job_id(self, result):
+        parts = str(result.stdout).split("\n")
+        for part in parts:
+            if _SFE.JOBID_STRING in part:
+                # full_job_id looks something like 549a938d-d2ca-11eb-b9f2-0a6713e9bd3a but only the first part of the
+                # hash is needed to access the right job afterwards
+                full_job_id = part.split(" ")[1]
+                self._job_id = full_job_id.split("-")[0]
+                self._logger.log(f"JobId of FEP+ run is {self._job_id}.", _LE.DEBUG)
+        if self._job_id is None:
+            self._logger.log(
+                "Could not obtain JobId after execution - abort.", _LE.ERROR
+            )
+            raise StepFailed
+
+    def _get_log_file(self) -> List[str]:
+        arguments = [
+            self._job_id,
+            _SFE.FILE_NAME,
+            f'{self.settings.arguments.parameters[_SFE.JOBNAME_FLAG]}_{_SFE.LOGFILE}"',
+        ]
+        logging_result = None
+        trials = 0
+        while trials < 30000:
+            logging_result = self._backend_executor.execute(
+                command=_FE.JSC_TAIL_FILE, arguments=arguments, check=False
+            )
+            if logging_result.returncode == 1:
+                time.sleep(30)
+                trials += 1
+                continue
+            elif logging_result.returncode == 0:
+                break
+        if logging_result is None:
+            raise StepFailed("Could not obtain log file from server within time limit.")
+        log_lines = str(logging_result.stdout).split("\n")
+        return log_lines
+
+    def _get_new_lines(self, old_file) -> List[str]:
+        new_lines = self._get_log_file()
+        # take the first n lines off the new log file where n is the length of the old log file
+        diff = new_lines[len(old_file) - 1 :]
+        return diff
+
+    def _wait_for_job_completion(self):
+        # get the log file at this state
+        log_file = self._get_log_file()
+        for line in log_file:
+            self._logger_blank.log(line, _LE.INFO)
+        # TODO: set maximum (or at least allow to set a maximum)
+        while (_SFE.FEP_EXEC_COMPLETE not in log_file) and (
+            _SFE.FEP_EXEC_PARTIAL_COMPLETE not in log_file
+        ):
+            time.sleep(30)
+            new_lines = self._get_new_lines(log_file)
+            if len(new_lines) > 0:
+                for line in new_lines:
+                    self._logger_blank.log(line, _LE.INFO)
+                    log_file.append(line)
+
+    def _clean_up(self, tmp_dir: str):
+        self._remove_temporary(tmp_dir)
+        self._job_id = None
+
+    def execute(self):
+        # generate the temporary directory and populate it with the required files
+        tmp_dir = self._make_tmpdir()
+        self.data.generic.write_out_all_files(tmp_dir)
+
+        # check compounds loaded in properly
+        if not self.data.compounds:
+            self._logger.log(
+                f"No compounds were loaded for step {self.step_id}!  If this was intentional you can ignore this warning.",
+                _LE.WARNING,
+            )
+
+        # obtain the arguments as a list of strings
+        arguments = self._parse_arguments()
+        self._logger.log(f"Executing FEP+ calculation in {tmp_dir}.", _LE.INFO)
+
+        # execute fep_plus
+        self._apply_token_guard()
+        result = self._backend_executor.execute(
+            command=_FE.FEP_EXECUTOR, arguments=arguments, location=tmp_dir, check=True
+        )
+
+        # get job ID from the job server
+        self._get_job_id(result)
+
+        # wait for job completion
+        self._wait_for_job_completion()
+
+        # extract the edge information from the log file (rather than the annotated map, as this is easier)
+        self._parse_output(tmp_dir)
+        self._extract_log_file_data(tmp_dir)
+        self._logger.log(f"Completed FEP+ execution.", _LE.INFO)
+
+        # clean-up and reset
+        self._clean_up(tmp_dir)
diff --git a/icolos/core/workflow_steps/schrodinger/fep_plus_setup.py b/icolos/core/workflow_steps/schrodinger/fep_plus_setup.py
new file mode 100644
index 0000000..4e5899e
--- /dev/null
+++ b/icolos/core/workflow_steps/schrodinger/fep_plus_setup.py
@@ -0,0 +1,303 @@
+from typing import List
+from icolos.core.containers.generic import GenericData
+from icolos.core.step_utils.structconvert import StructConvert
+from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase
+from icolos.utils.enums.program_parameters import (
+    FepPlusEnum,
+    SchrodingerExecutablesEnum,
+)
+from icolos.utils.enums.step_enums import StepBaseEnum, StepFepPlusEnum, StepGlideEnum
+from icolos.utils.execute_external.fep_plus import FepPlusExecutor
+from rdkit.Chem import SDMolSupplier
+from icolos.utils.execute_external.schrodinger import SchrodingerExecutor
+from icolos.core.workflow_steps.step import _LE
+import os
+from pydantic import BaseModel
+from rdkit.Chem import SDWriter
+
+_SFE = StepFepPlusEnum()
+_FE = FepPlusEnum()
+_SEE = SchrodingerExecutablesEnum()
+_SBE = StepBaseEnum
+_SGE = StepGlideEnum()
+
+
+class StepFepPlusSetup(StepSchrodingerBase, BaseModel):
+    """
+    Construct and analyse perturbation map for set of congeneric ligands
+    Supports extracting structures from poseviewer or pdb files
+    """
+
+    _schrodinger_executor: SchrodingerExecutor = None
+    _converter: StructConvert = None
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._initialize_backend(executor=FepPlusExecutor)
+        self._check_backend_availability()
+
+        self._schrodinger_executor = SchrodingerExecutor(
+            prefix_execution=self.execution.prefix_execution,
+            binary_location=self.execution.binary_location,
+        )
+        self._converter = StructConvert(
+            prefix_execution=self.execution.prefix_execution,
+            binary_location=self.execution.binary_location,
+        )
+
+    def _extract_receptor_from_pv(self, tmp_dir, input_file: str = _SFE.RECEPTOR_MAEGZ):
+        # run split_structure.py to obtain the receptor_structure
+        self._logger.log(f"Extracting receptor from structure.", _LE.INFO)
+        self._schrodinger_executor.execute(
+            command=_SEE.STRUCT_SPLIT,
+            arguments=[
+                "-m",
+                "pdb",
+                "-many_files",
+                os.path.join(tmp_dir, input_file),
+                f"{_SFE.STRUCT_SPLIT_BASE}.mae",
+            ],
+            check=True,
+            location=tmp_dir,
+        )
+
+        # get rid of the original receptor structure now we have the new one
+        os.remove(os.path.join(tmp_dir, _SFE.RECEPTOR_MAEGZ))
+
+    def _write_receptor_from_pv(self, tmp_dir):
+        # Handles writing the receptor structure to tmpdir, either from a poseviewer file, or a provided receptor
+        # take the first poseviewer file it can find and split the stricure, take only the receptor
+        for compound in self.get_compounds():
+            for enumeration in compound.get_enumerations():
+                for conformer in enumeration.get_conformers():
+                    if (
+                        _SGE.GLIDE_POSEVIEWER_FILE_KEY
+                        in conformer.get_extra_data().keys()
+                    ):
+                        with open(
+                            os.path.join(tmp_dir, _SFE.RECEPTOR_MAEGZ), "wb"
+                        ) as f:
+                            f.write(
+                                conformer.get_extra_data()[
+                                    _SGE.GLIDE_POSEVIEWER_FILE_KEY
+                                ]
+                            )
+                            break
+        if _SFE.RECEPTOR_MAEGZ in os.listdir(tmp_dir):
+            self._logger.log(
+                f"Writing poseviewer file to temporary directory.", _LE.INFO
+            )
+            self._extract_receptor_from_pv(tmp_dir)
+        elif self.data.generic.get_files_by_extension("pdb"):
+            # a pdb file was loaded to generic data, use this as the receptor structure
+            self.data.generic.get_argument_by_extension(
+                "pdb", rtn_file_object=True
+            ).write(os.path.join(tmp_dir, "receptor.pdb"), join=False)
+
+            self._logger.log(
+                "Converting provided pdb receptor structure to mae", _LE.DEBUG
+            )
+            self._converter.convert(
+                os.path.join(tmp_dir, "receptor.pdb"),
+                os.path.join(tmp_dir, f"{_SFE.STRUCT_SPLIT_BASE}_receptor1.mae"),
+            )
+            os.remove(os.path.join(tmp_dir, "receptor.pdb"))
+
+        else:
+            self._logger.log(
+                "No poseviewer file was found attached to any of the conformers, and no PDB receptor file was specified - this must be set in the docking step",
+                _LE.ERROR,
+            )
+            raise FileNotFoundError
+
+    def _check_xray_structure(self, compound_number):
+        # check to see if an xray structure has been provided for that compound
+        if _SFE.XRAY_STRUCTURES in self.settings.additional.keys():
+            if isinstance(self.settings.additional[_SFE.XRAY_STRUCTURES], dict):
+                if (
+                    compound_number
+                    in self.settings.additional[_SFE.XRAY_STRUCTURES].keys()
+                ):
+                    return True, _FE.DICT
+            elif os.path.isdir(self.settings.additional[_SFE.XRAY_STRUCTURES]):
+                if os.path.isfile(
+                    os.path.join(
+                        self.settings.additional[_SFE.XRAY_STRUCTURES],
+                        f"{compound_number}.pdb",
+                    )
+                ):
+                    return True, _FE.PATH
+        return False, None
+
+    def _rename_sdf(self, path, comp_num):
+        with open(path, "r") as f:
+            lines = f.readlines()[1:]
+        new_lines = [f"{comp_num}:0:0\n"]
+        for line in lines:
+            new_lines.append(line)
+        self._remove_temporary(path)
+        with open(path, "w") as f:
+            f.writelines(new_lines)
+
+    def _extract_ligand_from_pdb(self, tmp_dir: str, comp_num: int, type: str):
+        # if ligand poses have been provided from xray structures, extract just the ligand
+        self._logger.log(
+            f"Extracting ligand from provided Xray structure for compound {comp_num}",
+            _LE.DEBUG,
+        )
+        if type == _FE.DICT:
+            file_path = self.settings.additional[_SFE.XRAY_STRUCTURES[comp_num]]
+        else:
+            file_path = os.path.join(
+                self.settings.additional[_SFE.XRAY_STRUCTURES], f"{comp_num}.pdb"
+            )
+        if not os.path.isfile(file_path):
+            raise FileNotFoundError(
+                "The provided path to the xray structure does not exist or is not accessible"
+            )
+        self._schrodinger_executor.execute(
+            command=_SEE.STRUCT_SPLIT,
+            arguments=["-m", "pdb", "-many_files", file_path, f"{_SFE.XRAY_SPLIT}.sdf"],
+            check=True,
+            location=tmp_dir,
+        )
+        # remove everything apart from the ligand sdf which is concatenated later
+        lig_found = False
+        for file in os.listdir(tmp_dir):
+            idx = file.split("/")[-1]
+            if idx.startswith(_SFE.XRAY_SPLIT):
+                if "ligand" in idx:
+                    # need to modify the name from the standard that Schrodinger provides
+                    self._rename_sdf(os.path.join(tmp_dir, file), comp_num)
+                    mols = SDMolSupplier(os.path.join(tmp_dir, file))
+
+                    data = mols[0]
+                    lig_found = True
+                    self._remove_temporary(os.path.join(tmp_dir, file))
+                else:
+                    self._remove_temporary(os.path.join(tmp_dir, file))
+        if lig_found:
+            return data
+
+    def _write_input_files(self, tmp_dir):
+        # write receptor structure to tmpdir, either from poseviewer or provided pdb file
+        self._write_receptor_from_pv(tmp_dir)
+
+        # write out all conformers present in self.data.compounds to a single sdf file.
+        writer = SDWriter(os.path.join(tmp_dir, "concatenated.sdf"))
+        for compound in self.get_compounds():
+            # If an xray pose is provided, use this
+            flag, type = self._check_xray_structure(compound.get_compound_number())
+            if flag is True:
+                self._logger.log(
+                    "Found Xray structure for the ligand - using this in preference to a docking pose",
+                    _LE.DEBUG,
+                )
+                mol = self._extract_ligand_from_pdb(
+                    tmp_dir, compound.get_compound_number(), type
+                )
+                writer.write(mol)
+            else:
+                # use the docked conformer
+                for enumeration in compound.get_enumerations():
+                    for conformer in enumeration.get_conformers():
+                        mol = conformer.get_molecule()
+                        writer.write(mol)
+
+    def _parse_arguments(self, io_dict: dict) -> List[str]:
+        arguments = []
+        for key in self.settings.arguments.parameters.keys():
+            arguments.append(key)
+            arguments.append(str(self.settings.arguments.parameters[key]))
+        for flag in self.settings.arguments.flags:
+            arguments.append(str(flag))
+        for key, value in io_dict.items():
+            arguments.append(key)
+            arguments.append(value)
+        return arguments
+
+    def _get_structcat_args(
+        self, tmp_dir: str, out_file_type: str, outfile: str
+    ) -> List[str]:
+        arguments = [
+            f"{_SEE.STRUCTCAT_I}mae",
+            os.path.join(tmp_dir, f"{_SFE.STRUCT_SPLIT_BASE}_receptor1.mae"),
+            f"{_SEE.STRUCTCAT_I}sd",
+        ]
+
+        for file in os.listdir(tmp_dir):
+            if file.endswith("sdf"):
+                arguments.append(os.path.join(tmp_dir, file))
+        arguments.append(f"{_SEE.STRUCTCAT_O}{out_file_type}")
+        arguments.append(os.path.join(tmp_dir, outfile))
+        return arguments
+
+    def _concatenate_pv_files(self, tmp_dir: str):
+        # create a poseviewer-formatted file with receptor structure, then docked ligand poses
+        arguments = self._get_structcat_args(
+            tmp_dir=tmp_dir, out_file_type="mae", outfile=_SFE.STRUCTCAT_MAEGZ_OUTFILE
+        )
+        self._schrodinger_executor.execute(
+            command=_SEE.STRUCTCAT, arguments=arguments, check=True
+        )
+
+    def _analyse_map(self, tmp_dir):
+        """run fmp_stats program to analyse map - generate node similarities etc"""
+        result = self._schrodinger_executor.execute(
+            command=_SEE.FMP_STATS,
+            arguments=["out.fmp", "-f"],
+            check=True,
+            location=tmp_dir,
+        )
+        log_lines = []
+        for line in str(result.stdout).split("\n"):
+            self._logger_blank.log(line, _LE.INFO)
+            log_lines.append(line + "\n")
+
+        self.data.generic.add_file(
+            GenericData(file_name="fep_mapper.log", file_data=log_lines)
+        )
+
+    def _parse_output(self, tmp_dir: str):
+        # needs to retrieve the edge and fmp files produced by the mapper step and attach to the generic dict
+        files = [
+            os.path.join(tmp_dir, f)
+            for f in os.listdir(tmp_dir)
+            if f.endswith(("fmp", "edge", "log"))
+        ]
+
+        for file in files:
+            try:
+                with open(file, "r") as f:
+                    data = f.read()
+            except UnicodeDecodeError:
+                with open(file, "rb") as f:
+                    data = f.read()
+            self._add_data_to_generic(file, data)
+
+    def execute(self):
+        # run the job in a temporary directory
+        tmp_dir = self._make_tmpdir()
+
+        self._write_input_files(tmp_dir)
+        self._concatenate_pv_files(tmp_dir)
+        io_dict = {
+            "": os.path.join(tmp_dir, _SFE.STRUCTCAT_MAEGZ_OUTFILE),
+            "-o": _SFE.FEP_MAPPER_OUTPUT,
+        }
+        arguments = self._parse_arguments(io_dict=io_dict)
+        self._apply_token_guard()  # need to implement for reliability
+        self._logger.log("Optimising perturbation map", _LE.DEBUG)
+        self._backend_executor.execute(
+            command=_FE.FEP_MAPPER, arguments=arguments, check=True, location=tmp_dir
+        )
+        assert os.path.isfile(os.path.join(tmp_dir, "out.fmp"))
+        self._logger.log(
+            f"Successfully executed fep_mapper in directory {tmp_dir}.", _LE.DEBUG
+        )
+
+        self._logger.log("Analysing the perturbation map.", _LE.DEBUG)
+        self._analyse_map(tmp_dir)
+        self._parse_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/schrodinger/glide.py b/icolos/core/workflow_steps/schrodinger/glide.py
new file mode 100644
index 0000000..af2cc1d
--- /dev/null
+++ b/icolos/core/workflow_steps/schrodinger/glide.py
@@ -0,0 +1,635 @@
+import gzip
+import os
+import shutil
+import tempfile
+from copy import deepcopy
+from typing import List, Tuple
+
+from pydantic import BaseModel
+from rdkit import Chem
+
+from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase
+from icolos.utils.execute_external.glide import GlideExecutor
+from icolos.utils.execute_external.schrodinger import SchrodingerExecutor
+from icolos.utils.general.files_paths import any_in_file, gen_tmp_file
+
+from icolos.core.containers.compound import Conformer
+
+from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum, GlideEnum
+from icolos.utils.enums.step_enums import StepGlideEnum, StepBaseEnum
+from icolos.core.workflow_steps.step import _LE
+from icolos.utils.general.parallelization import SubtaskContainer, Parallelizer, Subtask
+from icolos.utils.general.strings import stringify
+
+
+class GlideSupportEnum:
+
+    GLIDE_INPUTBLOCK_COMMASEPARATED = [
+        "CONSTRAINT_GROUP"
+    ]  # define list of block keys which are to have commas
+    GLIDE_INPUTBLOCK_VALUEQUOTED = [
+        "FEATURE"
+    ]  # define list of block keys, where values are to be put
+    # into double quotation marks
+
+    GLIDE_TG_WAIT_INTERVAL = "wait_interval_seconds"
+    GLIDE_TG_WAIT_LIMIT = "wait_limit_seconds"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+_SBE = StepBaseEnum
+_EE = GlideEnum()
+_SGE = StepGlideEnum()
+_SEE = SchrodingerExecutablesEnum()
+_GSE = GlideSupportEnum()
+
+
+class StepGlide(StepSchrodingerBase, BaseModel):
+
+    _schrodinger_executor: SchrodingerExecutor = None
+
+    class Config:
+        underscore_attrs_are_private = True
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # initialize the executors and test availability
+        self._initialize_backend(executor=GlideExecutor)
+        self._check_backend_availability()
+
+        self._schrodinger_executor = SchrodingerExecutor(
+            prefix_execution=self.execution.prefix_execution,
+            binary_location=self.execution.binary_location,
+        )
+
+    def _get_scores_from_conformer(self, conformer: Chem.Mol) -> Tuple[float, float]:
+        return (
+            float(conformer.GetProp(_SGE.GLIDE_DOCKING_SCORE)),
+            float(conformer.GetProp(_SGE.GLIDE_GSCORE)),
+        )
+
+    def _set_docking_score(self, conformer: Chem.Mol) -> bool:
+        try:
+            docking_score, g_score = self._get_scores_from_conformer(conformer)
+        except KeyError:
+            return False
+        conformer.SetProp(_SBE.ANNOTATION_TAG_DOCKING_SCORE, str(docking_score))
+        conformer.SetProp(_SBE.ANNOTATION_TAG_G_SCORE, str(g_score))
+        return True
+
+    def _generate_temporary_input_output_files(
+        self, batch: List[List[Subtask]]
+    ) -> Tuple[List[str], List[str], List[str], List[str]]:
+        tmp_output_dirs = []
+        tmp_input_mae_paths = []
+        tmp_output_sdf_paths = []
+        tmp_output_maegz_paths = []
+
+        for next_subtask_list in batch:
+            # generate temporary input files and output directory
+            cur_tmp_output_dir = tempfile.mkdtemp()
+            _, cur_tmp_sdf = gen_tmp_file(suffix=".sdf", dir=cur_tmp_output_dir)
+            _, cur_tmp_mae = gen_tmp_file(suffix=".mae", dir=cur_tmp_output_dir)
+
+            # write-out the temporary input file
+            writer = Chem.SDWriter(cur_tmp_sdf)
+            one_written = False
+            for subtask in next_subtask_list:
+                enumeration = subtask.data
+                mol = deepcopy(enumeration.get_molecule())
+                if mol is not None:
+                    mol.SetProp("_Name", enumeration.get_index_string())
+                    one_written = True
+                    writer.write(mol)
+            writer.close()
+            if one_written is False:
+                self._remove_temporary(cur_tmp_output_dir)
+                continue
+
+            # translate the SDF into a MAE file
+            self._translate_SDF_to_MAE(
+                sdf_path=cur_tmp_sdf,
+                mae_path=cur_tmp_mae,
+                executor=self._schrodinger_executor,
+            )
+
+            # add the path to which "_dock_subjob()" will write the result SDF
+            _, output_sdf_path = gen_tmp_file(
+                suffix="_result.sdf", dir=cur_tmp_output_dir
+            )
+            _, output_maegz_path = gen_tmp_file(
+                suffix="_result.maegz", dir=cur_tmp_output_dir, text=False
+            )
+            tmp_output_sdf_paths.append(output_sdf_path)
+            tmp_output_maegz_paths.append(output_maegz_path)
+            tmp_input_mae_paths.append(cur_tmp_mae)
+            tmp_output_dirs.append(cur_tmp_output_dir)
+        return (
+            tmp_output_dirs,
+            tmp_input_mae_paths,
+            tmp_output_sdf_paths,
+            tmp_output_maegz_paths,
+        )
+
+    def _all_keywords(self) -> dict:
+        """Returns joined keywords from JSON and from .in file (if specified)."""
+
+        keywords = {}
+
+        # keywords from maestro file; they can be overwritten by explicitly set values from the "configuration" block
+        maestro_in_file = deepcopy(
+            self.settings.additional.get(_SGE.MAESTRO_IN_FILE, None)
+        )
+        if maestro_in_file is not None:
+            with open(maestro_in_file[_SGE.MAESTRO_IN_FILE_PATH], "rt") as f:
+                keywords_from_file = self._parse_maestro_in_file(f.readlines())
+                keywords.update(keywords_from_file)
+
+        # Add keywords from advanced_glide_keywords
+        # (they are keywords with file paths),
+        # skipping keywords that are None.
+        # Also skip maestro file - that's not a keyword.
+        # TODO: This is legacy code from DockStream's implementation, which was necessary to accommodate the GUI.
+        #       Remove?
+        # if self.parameters.advanced_glide_keywords is not None:
+        #    adv_kw = stringify({
+        #        k: v
+        #        for k, v in self.parameters.advanced_glide_keywords.dict().items()
+        #        if v is not None and k not in {'maestro_file'}
+        #    })
+        #    keywords.update(adv_kw)
+
+        # Add "ordinary" keywords, overwriting existing ones.
+        json_keywords = stringify(
+            deepcopy(self.settings.additional.get(_SGE.CONFIGURATION, {}))
+        )
+        keywords.update(
+            json_keywords
+        )  # Overwrites any keywords that are already present.
+        return keywords
+
+    def _configuration_Maestro_reformat(self, configuration: dict):
+        # rewrite keyword input file in Maestro format
+        maestro_indent = "    "
+        maestro_spacing = "   "
+
+        element_lines = []
+        block_lines = []
+
+        for key in configuration.keys():
+            if isinstance(configuration[key], str):
+                # keyword holds one dictionary (string) only
+                element_lines.append(
+                    maestro_spacing.join([key, configuration[key] + "\n"])
+                )
+            elif isinstance(configuration[key], dict):
+                # keyword holds a composite block and has no dictionary (e.g. constraints); note, that these must
+                # always be at the end of the file
+                block_lines.append("\n" + key + "\n")
+                block = configuration[key]
+                for key_idx, block_key in enumerate(block.keys()):
+                    block_value = block[block_key]
+
+                    # if this is a value in certain blocks, put it into double quotation marks as spaces are present
+                    if any([x in key for x in _GSE.GLIDE_INPUTBLOCK_VALUEQUOTED]):
+                        block_value = '"' + block_value + '"'
+                    line = maestro_indent + maestro_spacing.join(
+                        [block_key, block_value]
+                    )
+
+                    # add comma to block definition, if there are more lines to come and the block requires it
+                    # note, that not all blocks in GLIDE require this; in some cases, the comma is already part of
+                    # the line (then skip it!)
+                    if any([x in key for x in _GSE.GLIDE_INPUTBLOCK_COMMASEPARATED]):
+                        if (key_idx + 1) < len(block) and line[-1] != ",":
+                            line = line + ","
+
+                    block_lines.append(line + "\n")
+            else:
+                raise Exception(
+                    f"Cannot handle type {type(configuration[key])} in configuration file specification, only use strings and blocks."
+                )
+
+        return element_lines, block_lines
+
+    def _write_configuration_to_file(self, configuration: dict, path: str):
+        """Function to generate a keyword input file in Maestro format."""
+
+        # call a function that returns the input keywords in Maestro format
+        element_lines, block_lines = self._configuration_Maestro_reformat(
+            configuration=configuration
+        )
+
+        # arrange the elements and blocks
+        if path is None:
+            _, path = gen_tmp_file(suffix=".in")
+        with open(path, mode="w") as f:
+            self._logger.log(f"Writing GLIDE input file {path}:\n", _LE.DEBUG)
+            for line in element_lines:
+                f.write(line)
+                self._logger_blank.log(line.rstrip("\n"), _LE.DEBUG)
+            for line in block_lines:
+                f.write(line)
+                self._logger_blank.log(line.rstrip("\n"), _LE.DEBUG)
+            self._logger_blank.log("", _LE.DEBUG)
+            self._logger.log("--- End file", _LE.DEBUG)
+
+    def _get_time_limit_per_task(self):
+        # for "SP" method, it can be expected to that about 90 s / ligand is required at most
+        # use a bit extra
+        return int(self.settings.additional.get(_SGE.TIME_LIMIT_PER_TASK, 120))
+
+    def _get_path_tmp_results(
+        self, glide_pose_outtype: str, base_path: str
+    ) -> Tuple[str, str]:
+        if glide_pose_outtype == _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB:
+            path_tmp_results = os.path.join(
+                os.path.dirname(base_path),
+                "".join(
+                    [
+                        os.path.splitext(os.path.basename(base_path))[0],
+                        _SGE.GLIDE_SDF_DEFAULT_EXTENSION,
+                    ]
+                ),
+            )
+        elif glide_pose_outtype == _EE.GLIDE_POSE_OUTTYPE_POSEVIEWER:
+            path_tmp_results = os.path.join(
+                os.path.dirname(base_path),
+                "".join(
+                    [
+                        os.path.splitext(os.path.basename(base_path))[0],
+                        _SGE.GLIDE_MAEGZ_DEFAULT_EXTENSION,
+                    ]
+                ),
+            )
+        else:
+            raise NotImplementedError(
+                f"Specified out-type {glide_pose_outtype} for Glide not supported."
+            )
+
+        path_tmp_log = os.path.join(
+            os.path.dirname(base_path),
+            "".join([os.path.splitext(os.path.basename(base_path))[0], _SGE.GLIDE_LOG]),
+        )
+        return path_tmp_results, path_tmp_log
+
+    def _move_result_files(
+        self,
+        glide_pose_outtype: str,
+        path_tmp_results: str,
+        path_sdf_results: str,
+        path_maegz_results: str,
+    ):
+        if glide_pose_outtype == _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB:
+            if os.path.isfile(path_tmp_results):
+                with gzip.open(path_tmp_results, "rb") as fin:
+                    with open(path_sdf_results, "wb") as fout:
+                        shutil.copyfileobj(fin, fout)
+        elif glide_pose_outtype == _EE.GLIDE_POSE_OUTTYPE_POSEVIEWER:
+            # as the output is in MAEGZ format, we need to translate it into an SDF (and move the original file to the
+            # expected path)
+            self._translate_MAE_to_SDF(
+                mae_path=path_tmp_results,
+                sdf_path=path_sdf_results,
+                executor=self._schrodinger_executor,
+            )
+            os.rename(path_tmp_results, path_maegz_results)
+        else:
+            raise NotImplementedError(
+                f"Specified out-type {glide_pose_outtype} for Glide not supported."
+            )
+
+    def _run_subjob(
+        self,
+        mae_ligand_path,
+        path_sdf_results,
+        path_maegz_results,
+        tmp_output_dir,
+        grid_path,
+        sublist,
+    ):
+        # 1) increase the sublist "tries" and set status to "failed"
+        _ = [task.increment_tries() for task in sublist]
+        _ = [task.set_status_failed() for task in sublist]
+
+        # 2) change to directory, to be able to use relative paths (to compensate for Schrodinger bug with AWS)
+        working_dir = os.getcwd()
+        os.chdir(tmp_output_dir)
+
+        # 3) get "keywords" dictionary and overwrite necessary values
+        #    add "LIGANDFILE" keyword to list of keywords: full path to "mae" formatted ligands
+        configuration = self._all_keywords()
+        if configuration is None:
+            raise ValueError(
+                f"You need to specify at least the gridfile path in the configuration for Glide."
+            )
+        configuration[_EE.GLIDE_LIGANDFILE] = mae_ligand_path
+
+        # set the path to the grid file for this run
+        configuration[_EE.GLIDE_GRIDFILE] = grid_path
+
+        # if not set, set the liand pose outtype to "LIGANDLIB" (SDF output without receptor)
+        glide_pose_outtype = configuration.get(
+            _EE.GLIDE_POSE_OUTTYPE, _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB
+        )
+        configuration[_EE.GLIDE_POSE_OUTTYPE] = glide_pose_outtype
+
+        # 4) write the keyword-input file for the "Glide" backend; write-out to temporary file
+        _, glide_configuration_path = gen_tmp_file(suffix=".in", dir=tmp_output_dir)
+        self._write_configuration_to_file(
+            configuration=configuration,
+            path=glide_configuration_path,
+        )
+
+        # 5) wait / sleep until job is completed
+        #    Note, that while Glide has an option "-WAIT", this does not seem to work when getting back
+        #    data from AWS (probably it ends before copying back the data properly); stay with this solution for now
+        path_tmp_results, path_tmp_log = self._get_path_tmp_results(
+            glide_pose_outtype=glide_pose_outtype, base_path=glide_configuration_path
+        )
+
+        # 5) execute the "Glide" backend
+        arguments = self._prepare_glide_arguments(glide_configuration_path)
+        execution_result = self._backend_executor.execute(
+            command=_EE.GLIDE,
+            arguments=arguments,
+            check=True,
+            location=os.path.dirname(glide_configuration_path),
+        )
+
+        # 6) check return code (anything but '0' is bad) and add "stdout" to log file
+        time_exceeded = False
+        if execution_result.returncode != 0:
+            msg = (
+                f"Could not dock with Glide, error message: {execution_result.stdout}."
+            )
+            self._logger.log(msg, _LE.ERROR)
+            self._print_log_file(path_tmp_log)
+            raise RuntimeError()
+        else:
+            if (
+                self._wait_until_file_generation(
+                    path=path_tmp_results,
+                    path_log=path_tmp_log,
+                    interval_sec=10,
+                    maximum_sec=max(
+                        self._get_time_limit_per_task() * len(sublist), 300
+                    ),
+                    success_strings=_EE.GLIDE_LOG_FINISHED_STRINGS,
+                    fail_strings=_EE.GLIDE_LOG_FAIL_STRINGS,
+                )
+                is False
+            ):
+                time_exceeded = True
+                self._logger.log(
+                    f"Sublist docking for output file {path_tmp_results} exceeded time limit or failed, "
+                    f"all these ligands are ignored in the final write-out. This could mean that none of "
+                    f"them could be docked or a runtime error in Glide occured.",
+                    _LE.DEBUG,
+                )
+
+        # 6) load the log-file (if generated) and check if all went well
+        if (
+            any_in_file(path_tmp_log, _EE.GLIDE_LOG_SUCCESS_STRING)
+            and time_exceeded is False
+        ):
+            self._logger.log(
+                f"Finished sublist (input: {mae_ligand_path}, output: {path_sdf_results}).",
+                _LE.DEBUG,
+            )
+        else:
+            self._print_log_file(path_tmp_log)
+
+        # 7) collect the results; Glide outputs the sdf with a given, semi-hard-coded path; extract the sdf file
+        self._move_result_files(
+            glide_pose_outtype=glide_pose_outtype,
+            path_tmp_results=path_tmp_results,
+            path_sdf_results=path_sdf_results,
+            path_maegz_results=path_maegz_results,
+        )
+
+        # 8) revert back to working directory
+        os.chdir(working_dir)
+
+    def _prepare_glide_arguments(self, glide_configuration_path: str) -> List[str]:
+        # Note, that the first argument is the path to the configuration input file
+        # If the number of cores has been set, overwrite "N_JOBS" and parallelize internally and also note
+        # that each subjob requires a license; instead start each with "N_JOBS" = 1
+        arguments = [glide_configuration_path]
+
+        # copy parameters and overwrite as necessary
+        parameters = deepcopy(self.settings.arguments.parameters)
+        parameters[_EE.GLIDE_NJOBS] = 1
+
+        if len(self.settings.arguments.flags) > 0:
+            for flag in self.settings.arguments.flags:
+                # -WAIT leads to issues at times: The process may not return properly
+                # (e.g. because of writing problems) and then gets stuck; workaround with waiting
+                # for file completion, so remove it if set
+                if flag not in [_EE.GLIDE_WAIT]:
+                    arguments.append(str(flag))
+        if parameters:
+            for key in parameters.keys():
+                # remove "-WAIT" if set as a parameter, as this leads to instability issues and ignore empty keys
+                if key == _EE.GLIDE_WAIT or key == "":
+                    continue
+                arguments.append(key)
+                if parameters[key] is not None and parameters[key] != "":
+                    arguments.append(str(parameters[key]))
+        return arguments
+
+    def _execute_glide(self, grid_id: str, grid_path: str):
+        # TODO: add individual resubmission for failed subtasks
+        # get number of sublists in batch and initialize Parallelizer
+        glide_parallelizer = Parallelizer(func=self._run_subjob)
+
+        # continue until everything is successfully done or number of retries have been exceeded
+        while self._subtask_container.done() is False:
+            next_batch = self._get_sublists(get_first_n_lists=self._get_number_cores())
+
+            # generate paths and initialize molecules (so that if they fail, this can be covered)
+            (
+                tmp_output_dirs,
+                tmp_input_mae_paths,
+                tmp_output_sdf_paths,
+                tmp_output_maegz_paths,
+            ) = self._generate_temporary_input_output_files(next_batch)
+
+            # call "token guard" method (only executed, if block is specified in the configuration), which will wait
+            # with the execution if not enough tokens are available at the moment
+            self._apply_token_guard()
+
+            # execute the current batch in parallel; hand over lists of parameters (will be handled by Parallelizer)
+            # also increment the tries and set the status to "failed" (don't do that inside subprocess, as data is
+            # copied, not shared!)
+            _ = [sub.increment_tries() for element in next_batch for sub in element]
+            _ = [sub.set_status_failed() for element in next_batch for sub in element]
+            list_grid_path = [grid_path for _ in tmp_input_mae_paths]
+            glide_parallelizer.execute_parallel(
+                mae_ligand_path=tmp_input_mae_paths,
+                path_sdf_results=tmp_output_sdf_paths,
+                path_maegz_results=tmp_output_maegz_paths,
+                tmp_output_dir=tmp_output_dirs,
+                grid_path=list_grid_path,
+                sublist=next_batch,
+            )
+
+            # parse the output of that particular batch and remove temporary files
+            self._parse_glide_output(
+                tmp_output_sdf_paths,
+                tmp_output_maegz_paths,
+                next_batch,
+                grid_id,
+                grid_path,
+            )
+
+            # clean-up
+            self._remove_temporary(tmp_output_dirs)
+
+            # print the progress for this execution
+            self._log_execution_progress()
+
+    def _log_execution(self, grid_id: str, number_grids: int):
+        number_enumerations = 0
+        number_conformers = 0
+        for compound in self.get_compounds():
+            number_enumerations += len(compound)
+            for enumeration in compound:
+                number_conformers += len(enumeration)
+                if len(enumeration) == 0:
+                    self._logger.log(
+                        f"Enumeration {enumeration.get_index_string()} has no docked poses attached.",
+                        _LE.DEBUG,
+                    )
+        self._logger.log(
+            f"Executed Schrodinger/Glide backend for grid {grid_id} (of {number_grids}), now storing a total of {number_conformers} conformers for {number_enumerations} enumerations in {len(self.get_compounds())} compounds.",
+            _LE.INFO,
+        )
+
+    def _parse_glide_output(
+        self,
+        tmp_output_sdf_paths: List[str],
+        tmp_output_maegz_paths: List[str],
+        batch: List[List[Subtask]],
+        grid_id: str,
+        grid_path: str,
+    ):
+        # TODO: refactor that (recombine with ligprep parsing?)
+        def _update_subtask(sublist: List[Subtask], enum_identifier: str):
+            for task in sublist:
+                if task.data.get_index_string() == enum_identifier:
+                    task.set_status_success()
+
+        def _add_poseviewer_file(conformer: Conformer, maegz_path: str):
+            if os.path.isfile(maegz_path) and os.path.getsize(maegz_path) > 0:
+                with open(maegz_path, "rb") as f:
+                    conformer.add_extra_data(
+                        key=_SGE.GLIDE_POSEVIEWER_FILE_KEY, data=f.read()
+                    )
+
+        for i in range(len(tmp_output_sdf_paths)):
+            # get input and output paths and check the files are there
+            path_sdf_results = tmp_output_sdf_paths[i]
+            path_maegz_results = tmp_output_maegz_paths[i]
+            cur_sublist = batch[i]
+
+            # this is a protection against the case where empty (file size == 0 bytes) files are generated due to
+            # a failure during docking
+            if (
+                not os.path.isfile(path_sdf_results)
+                or os.path.getsize(path_sdf_results) == 0
+            ):
+                continue
+
+            mol_supplier = Chem.SDMolSupplier(path_sdf_results, removeHs=False)
+            for mol in mol_supplier:
+                if mol is None:
+                    continue
+                cur_enumeration_name = str(mol.GetProp("_Name"))
+
+                # add the information on the actual grid used
+                mol.SetProp(_SBE.ANNOTATION_GRID_ID, str(grid_id))
+                mol.SetProp(_SBE.ANNOTATION_GRID_PATH, str(grid_path))
+                mol.SetProp(_SBE.ANNOTATION_GRID_FILENAME, os.path.basename(grid_path))
+
+                # if no docking score is attached (i.e. the molecule is a receptor or so, skip it)
+                if self._set_docking_score(mol) is not True:
+                    continue
+
+                # add molecule to the appropriate ligand
+                for compound in self.get_compounds():
+                    for enumeration in compound:
+                        if enumeration.get_index_string() == cur_enumeration_name:
+                            new_conformer = Conformer(
+                                conformer=mol,
+                                conformer_id=None,
+                                enumeration_object=enumeration,
+                            )
+                            _add_poseviewer_file(
+                                conformer=new_conformer, maegz_path=path_maegz_results
+                            )
+                            enumeration.add_conformer(new_conformer, auto_update=True)
+                            _update_subtask(
+                                cur_sublist, enum_identifier=cur_enumeration_name
+                            )
+                            break
+
+    def _sort_conformers(self):
+        # sort the conformers (best to worst) and update their names to contain the conformer id
+        # -> <compound>:<enumeration>:<conformer_number>
+        for compound in self.get_compounds():
+            for enumeration in compound:
+                enumeration.sort_conformers(
+                    by_tag=_SGE.GLIDE_DOCKING_SCORE, reverse=False
+                )
+
+    def execute(self):
+        # in order to be able to efficiently execute Glide on the enumeration level, all of them have to be unrolled
+        # Note: As they retain their respective Compound object, the attribution later on is simple
+        all_enumerations = []
+        for compound in self.get_compounds():
+            all_enumerations = all_enumerations + compound.get_enumerations()
+            for enumeration in compound:
+                enumeration.clear_conformers()
+
+        # to allow ensemble docking, loop over all provided grid files and annotate the origin of the conformers
+        gridfiles = deepcopy(self.settings.additional.get(_SGE.CONFIGURATION, None))[
+            _EE.GLIDE_GRIDFILE
+        ]
+        if not isinstance(gridfiles, list):
+            gridfiles = [gridfiles]
+
+        # set grid ids (generate indices, if not specified)
+        grid_ids = self.settings.additional.get(_SBE.GRID_IDS, [])
+        if len(grid_ids) != len(gridfiles):
+            self._logger.log(
+                f"There were {len(grid_ids)} grid_ids specified for {len(gridfiles)}, using indices instead.",
+                _LE.DEBUG,
+            )
+            grid_ids = [str(idx) for idx in range(len(gridfiles))]
+
+        for grid_id, grid_path in zip(grid_ids, gridfiles):
+            # split into sublists, according to the settings
+            self._subtask_container = SubtaskContainer(
+                max_tries=self.execution.failure_policy.n_tries
+            )
+            self._subtask_container.load_data(all_enumerations)
+
+            # execute Glide
+            self._execute_glide(grid_id=grid_id, grid_path=grid_path)
+
+            # do the logging
+            self._log_execution(grid_id=grid_id, number_grids=len(gridfiles))
+
+        # sort the conformers loaded to the enumerations
+        self._sort_conformers()
diff --git a/icolos/core/workflow_steps/schrodinger/ligprep.py b/icolos/core/workflow_steps/schrodinger/ligprep.py
new file mode 100644
index 0000000..01cf55e
--- /dev/null
+++ b/icolos/core/workflow_steps/schrodinger/ligprep.py
@@ -0,0 +1,322 @@
+import os
+import tempfile
+from typing import List
+
+from pydantic import BaseModel
+from rdkit import Chem
+
+from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase
+from icolos.utils.execute_external.ligprep import LigprepExecutor
+from icolos.utils.general.files_paths import gen_tmp_file
+
+from icolos.utils.general.molecules import get_charge_for_molecule
+from icolos.core.containers.compound import Enumeration, Conformer, get_compound_by_id
+
+from icolos.utils.enums.program_parameters import (
+    LigprepEnum,
+)
+from icolos.utils.enums.step_enums import StepLigprepEnum
+from icolos.core.workflow_steps.step import _LE, _CTE
+from icolos.utils.general.parallelization import SubtaskContainer, Parallelizer, Subtask
+from icolos.utils.general.print_log import print_log_file
+from icolos.utils.smiles import to_smiles
+
+_EE = LigprepEnum()
+_SLE = StepLigprepEnum()
+
+
+class StepLigprep(StepSchrodingerBase, BaseModel):
+    """
+    Interface to the LigPrep binary for ligand embedding
+    """
+
+    class Config:
+        underscore_attrs_are_private = True
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # initialize the executor and test availability
+        self._initialize_backend(executor=LigprepExecutor)
+        self._check_backend_availability()
+
+    def _prepare_ligprep_arguments(self) -> list:
+        arguments_list = []
+
+        # add user-specified command-line settings (if provided); note, that empty dictionaries evaluate
+        # to False
+        if len(self.settings.arguments.flags) > 0:
+            for flag in self.settings.arguments.flags:
+                arguments_list.append(str(flag))
+        if self.settings.arguments.parameters:
+            for key in self.settings.arguments.parameters.keys():
+                if key == _EE.LIGPREP_F:
+                    self._logger.log(
+                        'Removing "-f" parameter for Ligprep arguments - filter file settings need to be specified in the "additional" block directly.',
+                        _LE.WARNING,
+                    )
+                    continue
+
+                arguments_list.append(key)
+                if (
+                    self.settings.arguments.parameters[key] is not None
+                    and self.settings.arguments.parameters[key] != ""
+                ):
+                    arguments_list.append(str(self.settings.arguments.parameters[key]))
+
+        # add default settings, that are not exposed to the user yet
+        if _EE.LIGPREP_HOST not in arguments_list:
+            arguments_list.append(_EE.LIGPREP_HOST)
+            arguments_list.append(_EE.LIGPREP_HOST_LOCALHOST)
+        arguments_list.append(_EE.LIGPREP_WAIT)
+        arguments_list = arguments_list + [_EE.LIGPREP_NJOBS, 1]
+
+        return arguments_list
+
+    def _generate_temporary_input_output_files(self, batch: List[List[Subtask]]):
+        tmp_output_dirs = []
+        tmp_input_smi_paths = []
+        tmp_input_filter_paths = []
+        tmp_output_sdf_paths = []
+        dict_original_smiles = {}
+
+        for next_subtask_list in batch:
+            # generate temporary input files and output directory
+            cur_tmp_output_dir = tempfile.mkdtemp()
+            _, cur_tmp_smi = gen_tmp_file(suffix=".smi", dir=cur_tmp_output_dir)
+            _, cur_tmp_filter = gen_tmp_file(suffix=".lff", dir=cur_tmp_output_dir)
+
+            # write smiles to temporary file as "Ligprep" backend
+            with open(cur_tmp_smi, "w") as f:
+                for subtask in next_subtask_list:
+                    enumeration = subtask.data
+                    dict_original_smiles[
+                        enumeration.get_index_string()
+                    ] = enumeration.get_original_smile()
+                    f.write(
+                        enumeration.get_original_smile()
+                        + " "
+                        + enumeration.get_index_string()
+                        + "\n"
+                    )
+
+            # add the path to which "_dock_subjob()" will write the result SDF
+            _, output_sdf_path = gen_tmp_file(
+                suffix="_result.sdf", dir=cur_tmp_output_dir
+            )
+
+            # add the temporary paths
+            tmp_output_dirs.append(cur_tmp_output_dir)
+            tmp_input_smi_paths.append(cur_tmp_smi)
+            tmp_input_filter_paths.append(cur_tmp_filter)
+            tmp_output_sdf_paths.append(output_sdf_path)
+        return (
+            tmp_output_dirs,
+            tmp_input_smi_paths,
+            tmp_output_sdf_paths,
+            tmp_input_filter_paths,
+            dict_original_smiles,
+        )
+
+    def _execute_ligprep(self):
+        # TODO: add individual resubmission for failed subtasks
+        # get number of sublists in batch and initialize Parallelizer
+        ligprep_parallelizer = Parallelizer(func=self._run_subjob)
+
+        # continue until everything is successfully done or number of retries have been exceeded
+        while self._subtask_container.done() is False:
+            next_batch = self._get_sublists(get_first_n_lists=self._get_number_cores())
+
+            # generate paths and initialize molecules (so that if they fail, this can be covered)
+            (
+                tmp_output_dirs,
+                tmp_input_smi_paths,
+                tmp_output_sdf_paths,
+                tmp_input_filter_paths,
+                dict_original_smiles,
+            ) = self._generate_temporary_input_output_files(next_batch)
+
+            # call "token guard" method (only executed, if block is specified in the configuration), which will wait
+            # with the execution if not enough tokens are available at the moment
+            self._apply_token_guard()
+
+            # execute the current batch in parallel; hand over lists of parameters (will be handled by Parallelizer)
+            # also increment the tries and set the status to "failed" (don't do that inside subprocess, as data is
+            # copied, not shared!)
+            _ = [sub.increment_tries() for element in next_batch for sub in element]
+            _ = [sub.set_status_failed() for element in next_batch for sub in element]
+            ligprep_parallelizer.execute_parallel(
+                smi_ligand_path=tmp_input_smi_paths,
+                path_sdf_results=tmp_output_sdf_paths,
+                tmp_output_dir=tmp_output_dirs,
+                tmp_input_filter=tmp_input_filter_paths,
+                sublist=next_batch,
+            )
+
+            # parse the output of that particular batch and remove temporary files
+            self._parse_ligprep_output(
+                tmp_output_sdf_paths, dict_original_smiles, next_batch
+            )
+            self._remove_temporary(tmp_output_dirs)
+
+            # print the progress for this execution
+            self._log_execution_progress()
+
+    def _parse_ligprep_output(
+        self,
+        tmp_output_sdf_paths: List[str],
+        dict_original_smiles: dict,
+        batch: List[List[Subtask]],
+    ):
+        # TODO: refactor that
+        def _update_subtask(sublist: List[Subtask], enum_identifier: str):
+            for task in sublist:
+                if task.data.get_index_string() == enum_identifier:
+                    task.set_status_success()
+
+        for i in range(len(tmp_output_sdf_paths)):
+            # get input and output paths and check the files are there
+            path_sdf_results = tmp_output_sdf_paths[i]
+            cur_sublist = batch[i]
+            if (
+                not os.path.isfile(path_sdf_results)
+                or os.path.getsize(path_sdf_results) == 0
+            ):
+                continue
+
+            mol_supplier = Chem.SDMolSupplier(path_sdf_results, removeHs=False)
+            for mol in mol_supplier:
+                # Ligprep adds a "-1" to "-[N]" to the names in the variants tag; this tag is always added
+                # alternatively, the "_Name" property could be loaded
+                # TODO: add loading only the most likely tautomer here (based on _SLE.LIGPREP_TAUTOMER_PROBABILITY)
+                if mol is not None and mol.HasProp(_SLE.LIGPREP_VARIANTS):
+                    identifier, _ = mol.GetProp(_SLE.LIGPREP_VARIANTS).split("-")
+                    compound_id, enumeration_id = identifier.split(":")
+                    compound = get_compound_by_id(
+                        self.get_compounds(), int(compound_id)
+                    )
+                    enumeration = Enumeration(
+                        compound_object=compound,
+                        smile=to_smiles(mol),
+                        original_smile=dict_original_smiles[identifier],
+                        molecule=mol,
+                    )
+                    compound.add_enumeration(enumeration, auto_update=True)
+                    _update_subtask(cur_sublist, enum_identifier=identifier)
+                else:
+                    self._logger.log(
+                        f"Skipped molecule when loading as specified property {_SLE.LIGPREP_VARIANTS} could not be found - typically, this indicates that ligprep could not embed the molecule.",
+                        _LE.WARNING,
+                    )
+
+    def _add_filtering(self, arguments: list, tmp_input_filter: str) -> list:
+        filter_file_settings = self.settings.additional.get(_SLE.FILTER_FILE, None)
+        if filter_file_settings is not None:
+            filter_file = open(tmp_input_filter, "w")
+            for key in filter_file_settings.keys():
+                filter_file.write(
+                    f"{key}                             {filter_file_settings[key]}\n"
+                )
+            filter_file.close()
+            arguments = arguments + [_EE.LIGPREP_F, tmp_input_filter]
+        return arguments
+
+    def _run_subjob(
+        self,
+        smi_ligand_path: str,
+        path_sdf_results: str,
+        tmp_output_dir: str,
+        tmp_input_filter: str,
+        sublist: List[Subtask],
+    ):
+        # 1) increase the sublist "tries" and set status to "failed"
+        _ = [task.increment_tries() for task in sublist]
+        _ = [task.set_status_failed() for task in sublist]
+
+        # 2) change to directory, to be able to use relative paths (to compensate for Schrodinger bug with AWS)
+        working_dir = os.getcwd()
+        os.chdir(tmp_output_dir)
+
+        # 3) prepare "Ligprep" arguments
+        arguments = self._prepare_ligprep_arguments()
+        arguments = self._add_filtering(arguments, tmp_input_filter)
+        arguments = arguments + [
+            _EE.LIGPREP_INPUT_ISMI,
+            os.path.basename(smi_ligand_path),
+        ]
+        arguments = arguments + [
+            _EE.LIGPREP_OUTPUT_OSD,
+            os.path.basename(path_sdf_results),
+        ]
+
+        # 4) run "Ligprep" backend and add log file to "debug" mode logging
+        result = self._backend_executor.execute(
+            command=_EE.LIGPREP,
+            arguments=arguments,
+            location=tmp_output_dir,
+            check=False,
+        )
+
+        self._logger.log(
+            f"Executed Ligprep backend (output file: {path_sdf_results}).", _LE.DEBUG
+        )
+        path_tmp_log = os.path.join(
+            tmp_output_dir,
+            "".join(
+                [
+                    os.path.splitext(os.path.basename(path_sdf_results))[0],
+                    _EE.LIGPREP_LOG_ENDING,
+                ]
+            ),
+        )
+        print_log_file(path=path_tmp_log, logger=self._logger, level=_LE.DEBUG)
+
+        # 5) revert back to working directory
+        os.chdir(working_dir)
+
+    def _parse_ligprep_result(
+        self, sdf_output: str, enumeration: Enumeration
+    ) -> List[Conformer]:
+        charge = str(
+            get_charge_for_molecule(enumeration.get_molecule(), add_as_tag=False)
+        )
+        mol_supplier = Chem.SDMolSupplier(sdf_output, removeHs=False)
+        conformers = []
+        for mol_id, mol in enumerate(mol_supplier):
+            # note, that formal charge information would be kept if available before (i.e. it retains tags)
+            mol.SetProp(_CTE.FORMAL_CHARGE_TAG, charge)
+            conformers.append(Conformer(conformer=mol))
+        return conformers
+
+    def _log_execution(self, initial_enum_number: int):
+        number_enumerations_after = 0
+        for compound in self.get_compounds():
+            number_enumerations_after += len(compound.get_enumerations())
+        self._logger.log(
+            f"Executed LigPrep for {initial_enum_number} input enumerations, resulting in {number_enumerations_after} output enumerations.",
+            _LE.INFO,
+        )
+        for compound in self.get_compounds():
+            for enumeration in compound.get_enumerations():
+                self._logger.log(
+                    f"Added enumeration {enumeration.get_index_string()} with smile {enumeration.get_smile()}.",
+                    _LE.DEBUG,
+                )
+
+    def execute(self):
+        # in order to be able to efficiently execute Ligprep on the enumeration level, all of them have to be unrolled
+        # Note: As they retain their respective Compound object, the attribution later on is simple
+        all_enumerations = []
+        for compound in self.get_compounds():
+            all_enumerations = all_enumerations + compound.get_enumerations()
+            compound.clear_enumerations()
+        # TODO: we will use the "original_smile" of the enumeration to start the embedding; make sure it exists
+
+        # split into sublists, according to the settings
+        self._subtask_container = SubtaskContainer(
+            max_tries=self.execution.failure_policy.n_tries
+        )
+        self._subtask_container.load_data(all_enumerations)
+
+        self._execute_ligprep()
+        self._log_execution(initial_enum_number=len(all_enumerations))
diff --git a/icolos/core/workflow_steps/schrodinger/macromodel.py b/icolos/core/workflow_steps/schrodinger/macromodel.py
new file mode 100644
index 0000000..b67cda8
--- /dev/null
+++ b/icolos/core/workflow_steps/schrodinger/macromodel.py
@@ -0,0 +1,160 @@
+import os
+import subprocess
+from typing import Tuple, List
+
+from pydantic import BaseModel, PrivateAttr
+from rdkit import Chem
+
+from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase
+from icolos.utils.execute_external.macromodel import MacromodelExecutor
+
+from icolos.utils.general.molecules import get_charge_for_molecule
+
+from icolos.core.containers.compound import Enumeration, Conformer
+
+from icolos.utils.enums.program_parameters import (
+    MacromodelEnum,
+)
+from icolos.utils.enums.step_enums import StepMacromodelEnum
+from icolos.core.workflow_steps.step import _LE, _CTE
+from icolos.core.step_utils.sdconvert_util import SDConvertUtil
+
+_EE = MacromodelEnum()
+_MMSE = StepMacromodelEnum()
+
+
+class StepMacromodel(StepSchrodingerBase, BaseModel):
+    class Config:
+        underscore_attrs_are_private = True
+
+    _sdconvert_util = PrivateAttr()
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # initialize the executor and test availability
+        self._initialize_backend(executor=MacromodelExecutor)
+        self._check_backend_availability()
+
+        # prepare sdconvert utility
+        self._sdconvert_util = SDConvertUtil(
+            prefix_execution=self.execution.prefix_execution,
+            binary_location=self.execution.binary_location,
+        )
+
+        # extend parameters with the COM file default, if not present
+        if _MMSE.COM_FILE not in self.settings.arguments.parameters.keys():
+            self.settings.arguments.parameters[_MMSE.COM_FILE] = _MMSE.COM_FILE_DEFAULT
+
+    def _execute_macromodel(self, com_file: str) -> subprocess.CompletedProcess:
+        self._logger.log(
+            f"Executing MacroModel backend for com_file {com_file}.", _LE.DEBUG
+        )
+        arguments = []
+        for key in self.settings.arguments.parameters.keys():
+            # TODO: disentangle "special behaviour" for this key - move the com_file specification to a separate block
+            #       in the configuration
+            if key != _MMSE.COM_FILE:
+                arguments.append(key)
+                arguments.append(str(self.settings.arguments.parameters[key]))
+        for flag in self.settings.arguments.flags:
+            arguments.append(str(flag))
+        arguments.append(com_file)
+        self._apply_token_guard()
+        result = self._backend_executor.execute(
+            command=_EE.MACROMODEL, arguments=arguments, check=True
+        )
+        return result
+
+    def _set_formal_charge(self, parameters: dict, molecule: Chem.Mol) -> dict:
+        charge = get_charge_for_molecule(molecule)
+        parameters[_EE.XTB_CHRG] = charge
+        self._logger.log(f"Set charge for molecule to {charge}.", _LE.DEBUG)
+        return parameters
+
+    def _prepare_file_paths(self, tmp_dir: str) -> Tuple[str, str, str]:
+        # generate the paths to the temporary files
+        mae_input = os.path.join(tmp_dir, _MMSE.MAE_INPUT)
+        mae_output = os.path.join(tmp_dir, _MMSE.MAE_OUTPUT)
+        sdf_output = os.path.join(tmp_dir, _MMSE.SDF_OUTPUT)
+
+        return mae_input, mae_output, sdf_output
+
+    def _prepare_settings_file(self, tmp_dir: str) -> str:
+        path_settings_file = os.path.join(tmp_dir, _MMSE.COM_FILE_PATH)
+
+        # join the input and output paths (at the beginning of the COM file) and the
+        # settings from either the default or the configuration together
+        complete_com = "\n".join(
+            [
+                os.path.join(tmp_dir, _MMSE.MAE_INPUT),
+                os.path.join(tmp_dir, _MMSE.MAE_OUTPUT),
+                self.settings.arguments.parameters[_MMSE.COM_FILE],
+            ]
+        )
+        with open(path_settings_file, "w") as f:
+            f.writelines(complete_com)
+        return path_settings_file
+
+    def _prepare_run_files(
+        self, tmp_dir: str, enumeration: Enumeration
+    ) -> Tuple[str, str, str, str, str]:
+        # generate the file paths (NOT populated yet)
+        mae_input, mae_output, sdf_output = self._prepare_file_paths(tmp_dir)
+
+        # write the input SDF file and translate it into Schrodingers native MAE format
+        sdf_input = self._prepare_temp_input(tmp_dir, enumeration.get_molecule())
+        self._sdconvert_util.sdf2mae(sdf_input, mae_input)
+
+        # write out the settings file
+        com_file = self._prepare_settings_file(tmp_dir)
+
+        return sdf_input, mae_input, mae_output, sdf_output, com_file
+
+    def _parse_macromodel_result(
+        self, sdf_output: str, enumeration: Enumeration
+    ) -> List[Conformer]:
+        charge = str(
+            get_charge_for_molecule(enumeration.get_molecule(), add_as_tag=False)
+        )
+        mol_supplier = Chem.SDMolSupplier(sdf_output, removeHs=False)
+        conformers = []
+        for mol_id, mol in enumerate(mol_supplier):
+            # note, that formal charge information would be kept if available before (i.e. it retains tags)
+            mol.SetProp(_CTE.FORMAL_CHARGE_TAG, charge)
+            conformers.append(Conformer(conformer=mol))
+        return conformers
+
+    def execute(self):
+        for compound in self.get_compounds():
+            for enumeration in compound.get_enumerations():
+                if not self._input_object_valid(enumeration):
+                    continue
+
+                # set up
+                tmp_dir = self._move_to_temp_dir()
+
+                # get the paths to the MAE and SDF input and output files and the COM file (settings)
+                (
+                    sdf_input,
+                    mae_input,
+                    mae_output,
+                    sdf_output,
+                    com_file,
+                ) = self._prepare_run_files(tmp_dir=tmp_dir, enumeration=enumeration)
+
+                # execute MacroModel, obtain the output SDF and switch back the working directory to what it was before
+                result = self._execute_macromodel(com_file=com_file)
+                self._sdconvert_util.mae2sdf(mae_file=mae_output, sdf_file=sdf_output)
+                self._restore_working_dir()
+
+                # parse output
+                conformers = self._parse_macromodel_result(sdf_output, enumeration)
+                enumeration.clear_conformers()
+                enumeration.add_conformers(conformers=conformers, auto_update=True)
+                self._logger.log(
+                    f"Executed MacroModel and obtained {len(conformers)} conformers for enumeration {enumeration.get_index_string()}.",
+                    _LE.INFO,
+                )
+
+                self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/schrodinger/prepwizard.py b/icolos/core/workflow_steps/schrodinger/prepwizard.py
new file mode 100644
index 0000000..3db17fa
--- /dev/null
+++ b/icolos/core/workflow_steps/schrodinger/prepwizard.py
@@ -0,0 +1,90 @@
+from icolos.utils.enums.step_enums import StepGromacsEnum, StepPrepwizEnum
+from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum
+from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase
+from icolos.utils.execute_external.schrodinger import SchrodingerExecutor
+from icolos.core.containers.generic import GenericData
+from pydantic import BaseModel
+from copy import deepcopy
+import os
+
+_SEE = SchrodingerExecutablesEnum()
+_SGE = StepGromacsEnum()
+_SPE = StepPrepwizEnum()
+
+
+class StepPrepwizard(StepSchrodingerBase, BaseModel):
+    """
+    Interface to Schrodinger's PrepWizard program for protein prep
+    """
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        self._initialize_backend(executor=SchrodingerExecutor)
+        self._check_backend_availability()
+
+    def _parse_args(self):
+        parameters = deepcopy(self.settings.arguments.parameters)
+        arguments = []
+        if len(self.settings.arguments.flags) > 0:
+            for flag in self.settings.arguments.flags:
+                arguments.append(str(flag))
+        if parameters:
+            for key in parameters.keys():
+                arguments.append(key)
+                if parameters[key] is not None and parameters[key] != "":
+                    arguments.append(str(parameters[key]))
+        input_file = self.data.generic.get_file_names_by_extension("pdb")[0]
+        output_file = input_file  # write to the same file name to keep things tidy
+        arguments.append(input_file)
+        arguments.append(output_file)
+        return arguments
+
+    def _parse_output(self, tmp_dir: str):
+        output_pdb = os.path.join(
+            tmp_dir, self.data.generic.get_file_names_by_extension("pdb")[0]
+        )
+        with open(output_pdb, "r") as f:
+            data = f.read()
+        self.data.generic.clear_file_dict()
+        output_file = GenericData(file_name=_SGE.COMPLEX_PDB, file_data=data)
+        self.data.generic.add_file(output_file)
+
+    def _remove_ligand(self, tmp_dir):
+        remove_res = self.settings.additional[_SPE.REMOVE_RES]
+        pdb_file = self.data.generic.get_argument_by_extension("pdb")
+        cleaned_pdb_lines = []
+        # handle ligand removal mode: strip ligands, leave cofactors
+        if remove_res != _SPE.LIGANDS and not isinstance(remove_res, list):
+            remove_res = list(remove_res)
+
+        with open(os.path.join(tmp_dir, pdb_file), "r") as f:
+            if remove_res == _SPE.LIGANDS:
+                # automatically remove ligands, keep cofactors that are specified in the enum.
+                for line in f.readlines():
+                    if line is not None and (
+                        line.split()[0] == "ATOM"
+                        or any(l in line for l in _SPE.COFACTOR_IDS)
+                    ):
+                        cleaned_pdb_lines.append(line)
+            else:
+                for line in f.readlines():
+                    if not any(l in line for l in remove_res):
+                        cleaned_pdb_lines.append(line)
+
+        with open(os.path.join(tmp_dir, pdb_file), "w") as f:
+            f.writelines(cleaned_pdb_lines)
+
+    def execute(self):
+        tmp_dir = self._make_tmpdir()
+        args = self._parse_args()
+        self.data.generic.write_out_all_files(tmp_dir)
+        if (
+            _SPE.REMOVE_RES in self.settings.additional.keys()
+            and self.settings.additional[_SPE.REMOVE_RES] is not None
+        ):
+            self._remove_ligand(tmp_dir)
+        self._backend_executor.execute(
+            command=_SEE.PREPWIZARD, arguments=args, check=True, location=tmp_dir
+        )
+
+        self._parse_output(tmp_dir)
diff --git a/icolos/core/workflow_steps/schrodinger/prime.py b/icolos/core/workflow_steps/schrodinger/prime.py
new file mode 100644
index 0000000..0081f8a
--- /dev/null
+++ b/icolos/core/workflow_steps/schrodinger/prime.py
@@ -0,0 +1,239 @@
+import os
+
+from pydantic import BaseModel, PrivateAttr
+from rdkit import Chem
+from copy import deepcopy
+
+from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase
+from icolos.utils.execute_external.prime import PrimeExecutor
+from icolos.utils.execute_external.schrodinger import SchrodingerExecutor
+
+from icolos.core.containers.compound import Conformer
+
+from icolos.utils.enums.program_parameters import PrimeEnum, SchrodingerExecutablesEnum
+from icolos.utils.enums.step_enums import StepPrimeEnum, StepGlideEnum
+from icolos.core.workflow_steps.step import _LE
+from icolos.core.step_utils.sdconvert_util import SDConvertUtil
+from icolos.core.step_utils.structcat_util import StructcatUtil
+from icolos.utils.general.files_paths import gen_tmp_file
+from icolos.utils.general.parallelization import SubtaskContainer, Parallelizer
+from tempfile import mkdtemp
+
+_SPE = StepPrimeEnum()
+_PE = PrimeEnum()
+_SEE = SchrodingerExecutablesEnum()
+_SGE = StepGlideEnum()
+
+
+class StepPrime(StepSchrodingerBase, BaseModel):
+    """
+    Interface to Schrodinger's Prime mmgbsa implementation
+    """
+
+    _schrodinger_executor: SchrodingerExecutor = None
+
+    class Config:
+        underscore_attrs_are_private = True
+
+    _sdconvert_util = PrivateAttr()
+    _structcat_util = PrivateAttr()
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        # initialize the executor and test availability
+        self._initialize_backend(executor=PrimeExecutor)
+        self._check_backend_availability()
+        self._schrodinger_executor = SchrodingerExecutor(
+            prefix_execution=self.execution.prefix_execution
+        )
+
+        # prepare sdconvert utility
+        self._sdconvert_util = SDConvertUtil(
+            prefix_execution=self.execution.prefix_execution,
+            binary_location=self.execution.binary_location,
+        )
+
+        # prepare structcat utility
+        self._structcat_util = StructcatUtil(
+            prefix_execution=self.execution.prefix_execution,
+            binary_location=self.execution.binary_location,
+        )
+
+    def _execute_prime(self):
+        # note, that as the output file name cannot be set (an "-out.maegz" will be attached), this does
+        # not need to be heeded here and is encoded in the fixed file name strings
+
+        prime_parallelizer = Parallelizer(func=self._run_subjob)
+        n = 1
+
+        while self._subtask_container.done() is False:
+
+            next_batch = self._get_sublists(get_first_n_lists=self._get_number_cores())
+
+            # generate lists for the next batch
+            tmp_dirs, complex_paths, output_sdf_paths = self._prepare_batch(next_batch)
+
+            self._apply_token_guard()
+
+            _ = [sub.increment_tries() for element in next_batch for sub in element]
+            _ = [sub.set_status_failed() for element in next_batch for sub in element]
+
+            self._logger.log(f"Executing prime for batch {n}", _LE.DEBUG)
+
+            prime_parallelizer.execute_parallel(
+                complex_path=complex_paths,
+                sdf_output=output_sdf_paths,
+                tmp_output_dir=tmp_dirs,
+            )
+
+            self._parse_prime_output(
+                complex_paths, tmp_dirs, output_sdf_paths, next_batch
+            )
+            n += 1
+
+    def _parse_prime_output(self, complex_paths, tmp_dirs, output_sdf_paths, batch):
+        # go through the batch, get the info from the output file and
+        scores = []
+        for i in range(len(output_sdf_paths)):
+            cur_sublist = batch[i]
+            sdf_path = output_sdf_paths[i]
+            curr_enum = None
+            curr_conformer = None
+            mol_supplier = Chem.SDMolSupplier(sdf_path, removeHs=False)
+            for mol in mol_supplier:
+                # check whether the name corresponds to an enum or conformer
+                identifier = str(mol.GetProp("_Name"))
+                is_enum = True if len(identifier.split(":")) == 2 else False
+                if (
+                    not is_enum
+                ):  # if we are dealing with a conformer, drop the conformer index to get the enum id
+                    enum_index = ":".join(list(mol.GetProp("_Name").split(":"))[:-1])
+                else:
+                    enum_index = identifier
+                # extract the enumeration object, regardless of whether we're dealing with a conformer or an enumeration
+                prime_score = mol.GetProp(_SPE.MMGBSA_SCORE)
+                for compound in self.get_compounds():
+                    for enumeration in compound.get_enumerations():
+                        if enumeration.get_index_string() == enum_index:
+                            curr_enum = enumeration
+                # if we have a conformer, find the conformer with the right ID and append the score to the existing object
+                if not is_enum:
+                    assert curr_enum is not None
+                    for conformer in curr_enum.get_conformers():
+                        if conformer.get_index_string() == identifier:
+                            curr_conformer = conformer
+
+                else:
+                    # if scoring an enumeration, create and attach a conformer out of it
+                    curr_conformer = Conformer(conformer=mol)
+
+                    curr_enum.add_conformer(curr_conformer)
+
+                assert curr_conformer is not None
+                # now we have the conformer from the originals, set the prime score
+                curr_conformer.get_molecule().SetProp(_SPE.MMGBSA_SCORE, prime_score)
+                self._logger.log(
+                    f"Calculated dG Bind of {prime_score} for conformer {curr_conformer.get_index_string()}",
+                    _LE.INFO,
+                )
+                scores.append(prime_score)
+
+        # after parsing, remove the directories
+        self._remove_temporary(tmp_dirs)
+
+        # set success status
+        for sublist in batch:
+            for task in sublist:
+                task.set_status_success()
+
+    def _prepare_batch(self, batch):
+        # generate input files for the batch and return tmpdirs
+
+        tmp_dirs = []
+        complex_paths = []
+        output_sdf_paths = []
+        for next_subtask_list in batch:
+            tmp_dir = mkdtemp()
+            _, tmp_input_sdf_file = gen_tmp_file(suffix=".sdf", dir=tmp_dir)
+            _, tmp_input_mae_file = gen_tmp_file(suffix=".maegz", dir=tmp_dir)
+            _, tmp_output_sdf_file = gen_tmp_file(suffix=".sdf", dir=tmp_dir)
+            writer = Chem.SDWriter(tmp_input_sdf_file)
+            for subtask in next_subtask_list:
+                mol = deepcopy(subtask.data.get_molecule())
+                conf_id = subtask.data.get_index_string()
+                mol.SetProp("_Name", conf_id)
+                writer.write(mol)
+            writer.close()
+
+            # now we have an sdf file with all the conformers from that batch.  Attach the
+            structcat_args = [
+                "-imae",
+                self.settings.additional[_SPE.RECEPTOR],
+                "-isd",
+                tmp_input_sdf_file,
+                "-omae",
+                tmp_input_mae_file,
+            ]
+            self._schrodinger_executor.execute(
+                command=_SEE.STRUCTCAT,
+                arguments=structcat_args,
+                location=tmp_dir,
+                check=True,
+            )
+
+            tmp_dirs.append(tmp_dir)
+            complex_paths.append(tmp_input_mae_file)
+            output_sdf_paths.append(tmp_output_sdf_file)
+
+        return tmp_dirs, complex_paths, output_sdf_paths
+
+    def _run_subjob(self, complex_path, sdf_output, tmp_output_dir):
+
+        work_dir = os.getcwd()
+        os.chdir(tmp_output_dir)
+
+        arguments = [complex_path, _PE.PRIME_OUTTYPE, _PE.PRIME_OUTTYPE_LIGAND]
+        for key in self.settings.arguments.parameters.keys():
+            if key not in [_PE.PRIME_OUTTYPE, _PE.PRIME_NJOBS]:
+                arguments.append(key)
+                arguments.append(str(self.settings.arguments.parameters[key]))
+        for flag in self.settings.arguments.flags:
+            arguments.append(str(flag))
+        if _PE.PRIME_WAIT not in arguments:
+            arguments.append(_PE.PRIME_WAIT)
+
+        result = self._backend_executor.execute(
+            command=_PE.PRIME_MMGBSA,
+            arguments=arguments,
+            check=True,
+            location=tmp_output_dir,
+        )
+
+        output_file = complex_path.split(".")[0] + "-out.maegz"
+        assert os.path.isfile(output_file)
+        # Convert the mae ligand output back to sdf
+        self._sdconvert_util.mae2sdf(output_file, sdf_output)
+        os.chdir(work_dir)
+        return result
+
+    def execute(self):
+        # need to unwrap the conformers to efficiently run in parallel, create lists of subtasks, each with their own files, tmpdirs, then execute them in parallel
+        all_conformers = []
+        for compound in self.get_compounds():
+            for enumeration in compound.get_enumerations():
+                if enumeration.get_conformers():
+                    # default running mode is to score incoming conformers without changing their configurations
+                    for conformer in enumeration.get_conformers():
+                        all_conformers.append(conformer)
+                else:
+                    all_conformers.append(enumeration)
+
+        self._subtask_container = SubtaskContainer(
+            max_tries=self.execution.failure_policy.n_tries
+        )
+        self._subtask_container.load_data(all_conformers)
+        self._execute_prime()
+        self._logger.log(
+            f"Executed Prime for {len(all_conformers)} confomers", _LE.DEBUG
+        )
diff --git a/icolos/core/workflow_steps/step.py b/icolos/core/workflow_steps/step.py
new file mode 100644
index 0000000..657258c
--- /dev/null
+++ b/icolos/core/workflow_steps/step.py
@@ -0,0 +1,494 @@
+import time
+
+from icolos.core.containers.generic import GenericContainer, GenericData
+import multiprocessing
+import shutil
+import tempfile
+from typing import Callable, List, Dict, Tuple
+
+from pydantic import BaseModel, PrivateAttr
+from rdkit import Chem
+from copy import deepcopy
+import os
+
+
+from icolos.core.step_utils.input_preparator import (
+    StepData,
+    InputPreparator,
+    StepInputParameters,
+)
+from icolos.loggers.steplogger import StepLogger
+from icolos.loggers.blank_logger import BlankLogger
+from icolos.utils.enums.step_enums import StepGromacsEnum
+from icolos.core.containers.compound import Compound, Conformer
+from icolos.core.step_utils.step_writeout import (
+    StepWriteoutParameters,
+    WriteOutHandler,
+    _SBE,
+)
+from icolos.utils.enums.execution_enums import ExecutionResourceEnum
+from icolos.utils.execute_external.execute import Executor
+from icolos.utils.general.icolos_exceptions import StepFailed
+
+from icolos.utils.enums.compound_enums import CompoundTagsEnum
+from icolos.utils.enums.logging_enums import LoggingConfigEnum
+from icolos.utils.enums.write_out_enums import WriteOutEnum
+from icolos.utils.general.files_paths import gen_tmp_file, any_in_file
+from icolos.utils.general.parallelization import SubtaskContainer, Subtask
+from tempfile import mkdtemp
+from distutils.dir_util import copy_tree
+from icolos.core.containers.compound import unroll_enumerations, unroll_conformers
+from icolos.utils.general.progress_bar import get_progress_bar_string
+
+_LE = LoggingConfigEnum()
+_WE = WriteOutEnum()
+_CTE = CompoundTagsEnum()
+_SGE = StepGromacsEnum()
+_ERE = ExecutionResourceEnum
+
+
+class StepFailurePolicyParameters(BaseModel):
+    n_tries: int = 1
+    retry_wait_seconds: int = 10
+
+
+class StepExecutionResourceParameters(BaseModel):
+    partition: _ERE = _ERE.CORE
+    time: str = "12:00:00"
+    gres: str = None
+    mem: str = "64g"
+    cores: int = 8
+    modules: List = []
+    other_args: dict = {}
+
+
+class StepExecutionParameters(BaseModel):
+    class StepExecutionParallelizationParameters(BaseModel):
+        cores: int = 1
+        max_length_sublists: int = None
+
+    prefix_execution: str = None
+    binary_location: str = None
+    pipe_input: str = None
+    parallelization: StepExecutionParallelizationParameters = (
+        StepExecutionParallelizationParameters()
+    )
+    failure_policy: StepFailurePolicyParameters = StepFailurePolicyParameters()
+    check_backend_availability: bool = False
+    job_control: StepExecutionResourceParameters = StepExecutionResourceParameters()
+    resource: _ERE = _ERE.LOCAL
+
+
+class StepSettingsArgsParameters(BaseModel):
+    flags: List = []
+    parameters: Dict = {}
+
+
+class StepSettingsParameters(BaseModel):
+    arguments: StepSettingsArgsParameters = StepSettingsArgsParameters()
+    additional: Dict = {}
+
+
+class StepBase(BaseModel):
+    step_id: str
+    work_dir: str = None
+    type: str = None
+    data: StepData = StepData()
+    input: StepInputParameters = StepInputParameters()
+    writeout: List[StepWriteoutParameters] = []
+    execution: StepExecutionParameters = StepExecutionParameters()
+    settings: StepSettingsParameters = StepSettingsParameters()
+
+    class Config:
+        underscore_attrs_are_private = True
+
+    _logger = PrivateAttr()
+    _logger_blank = PrivateAttr()
+    _old_wdir = PrivateAttr()
+    _workflow_object = PrivateAttr()
+    _backend_executor: Executor = PrivateAttr()
+    _subtask_container: SubtaskContainer = PrivateAttr()
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._logger_blank = BlankLogger()
+        self._old_wdir = os.getcwd()
+        self._workflow_object = None
+        self._backend_executor = None
+
+        self._logger = StepLogger()
+        self._logger_blank = BlankLogger()
+
+    # @staticmethod
+    def _make_tmpdir(self):
+        if self.work_dir is not None:
+            return self.work_dir
+        else:
+            self.work_dir = tempfile.mkdtemp()
+            return self.work_dir
+
+    def _remove_temporary(self, paths):
+        if paths is not None:
+            if not isinstance(paths, list):
+                paths = [paths]
+            if (
+                self.get_workflow_object() is None
+                or self.get_workflow_object().header.global_settings.remove_temporary_files
+            ):
+                for path in paths:
+                    if os.path.isdir(path):
+                        shutil.rmtree(path, ignore_errors=True)
+                    elif os.path.isfile(path) and os.path.exists(path):
+                        os.remove(path)
+                    else:
+                        self._logger.log(
+                            f"Path {path} is neither a valid folder nor file path.",
+                            _LE.WARNING,
+                        )
+            else:
+                self._logger.log(
+                    f"Keeping {len(paths)} temporary file(s) / folder(s): {', '.join(paths)}",
+                    _LE.DEBUG,
+                )
+
+    @staticmethod
+    def _move_to_temp_dir() -> str:
+        cur_tmp_dir = tempfile.mkdtemp()
+        os.chdir(cur_tmp_dir)
+        return cur_tmp_dir
+
+    @staticmethod
+    def _move_to_dir(path: str):
+        os.chdir(path)
+
+    def _restore_working_dir(self):
+        os.chdir(self._old_wdir)
+
+    def execute(self):
+        raise NotImplementedError
+
+    def get_compound_by_name(self, name: str) -> Compound:
+        for compound in self.data.compounds:
+            if compound.get_name() == name:
+                return compound
+
+    def get_compounds(self) -> List[Compound]:
+        return self.data.compounds
+
+    def get_generic(self) -> GenericContainer:
+        return self.data.generic
+
+    def clone_compounds(self) -> List[Compound]:
+        return [deepcopy(comp) for comp in self.data.compounds]
+
+    def process_write_out(self):
+        # TODO: process generic data write-out
+        for writeout in self.writeout:
+            writeout_handler = WriteOutHandler(config=writeout)
+            writeout_handler.set_data(self.data)
+            writeout_handler.write()
+
+    def get_compound_stats(self) -> Tuple[int, int, int]:
+        n_comp = len(self.get_compounds())
+        n_enum = len(unroll_enumerations(self.get_compounds()))
+        n_conf = len(unroll_conformers(self.get_compounds()))
+        return n_comp, n_enum, n_conf
+
+    def generate_input(self):
+        preparator = InputPreparator(
+            workflow=self.get_workflow_object(), logger=self._logger
+        )
+        self.data, self.work_dir = preparator.generate_input(
+            step_input=self.input, step_type=self.type
+        )
+
+        # check for a perturbation map for fep workflows
+        self._logger.log(
+            f"Loaded {len(self.data.compounds)} compounds and {len(self.data.generic.get_flattened_files())} generic data fields for step {self.get_step_id()}.",
+            _LE.DEBUG,
+        )
+
+    def set_workflow_object(self, workflow_object):
+        self._workflow_object = workflow_object
+
+    def get_workflow_object(self):
+        return self._workflow_object
+
+    def get_step_id(self) -> str:
+        return self.step_id
+
+    def set_step_id(self, step_id: str):
+        self.step_id = step_id
+
+    def _initialize_backend(self, executor: Callable):
+        if self.execution.resource == _ERE.SLURM:
+            self._backend_executor = executor(
+                prefix_execution=self.execution.prefix_execution,
+                binary_location=self.execution.binary_location,
+                cores=self.execution.job_control.cores,
+                partition=self.execution.job_control.partition,
+                time=self.execution.job_control.time,
+                mem=self.execution.job_control.mem,
+                modules=self.execution.job_control.modules,
+                other_args=self.execution.job_control.other_args,
+                gres=self.execution.job_control.gres,
+            )
+        else:
+
+            self._backend_executor = executor(
+                prefix_execution=self.execution.prefix_execution,
+                binary_location=self.execution.binary_location,
+            )
+
+    def _unroll_compounds(
+        self,
+        compounds: List[Compound],
+        level: str = _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS,
+    ) -> List[Conformer]:
+        # TODO: move this to step_base or merge with methods from compound itself
+
+        all_conformers = []
+        for comp in compounds:
+            for enum in comp.get_enumerations():
+                if level == _SBE.WRITEOUT_COMP_CATEGORY_ENUMERATIONS:
+                    all_conformers.append(enum)
+                elif level == _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS:
+                    for conf in enum:
+                        all_conformers.append(conf)
+        return all_conformers
+
+    def write_conformers(self, path: str):
+        """Convenience function for frequent conformer coordinate write-out. Better to use the WriteOutHandler class."""
+        compounds_copy = self.clone_compounds()
+        params = {
+            _SBE.WRITEOUT_CONFIG: {
+                _SBE.WRITEOUT_COMP: {
+                    _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS
+                },
+                _SBE.WRITEOUT_DESTINATION: {
+                    _SBE.WRITEOUT_DESTINATION_RESOURCE: path,
+                    _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE,
+                    _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_SDF,
+                },
+            }
+        }
+        writeout_handler = WriteOutHandler(**params)
+        writeout_handler.set_data(StepData(compounds=compounds_copy))
+        writeout_handler.write()
+
+    def write_generic_by_extension(self, path: str, ext: str, join=True):
+        """writes all files of a specific file type to the specified directory, retaining original files names"""
+        for file in self.data.generic.get_files_by_extension(ext):
+            file.write(path, join=join)
+
+    def write_generic_by_name(self, path, name: str):
+        file = self.data.generic.get_file_by_name(name)
+        file.write(path)
+
+    def _check_backend_availability(self, strict=True):
+        if self._backend_executor is None:
+            raise Exception(
+                "Cannot check backend availability before initialization is complete."
+            )
+
+        if self.execution.check_backend_availability:
+            if not self._backend_executor.is_available():
+                if strict:
+                    raise StepFailed(
+                        f"Cannot initialize backend for step {self.step_id} - abort."
+                    )
+                else:
+                    self._logger.log(
+                        f"Backend availability check failed, proceeding anyways.",
+                        _LE.WARNING,
+                    )
+            else:
+                self._logger.log(f"Checked backend availability - valid.", _LE.DEBUG)
+
+    def _input_object_valid(self, obj) -> bool:
+        if obj.get_molecule() is None or not isinstance(obj.get_molecule(), Chem.Mol):
+            self._logger.log(
+                f"Object {obj.get_index_string()} skipped - no valid molecule.",
+                _LE.WARNING,
+            )
+            return False
+        return True
+
+    def _input_object_empty(self, obj) -> bool:
+        if obj.empty():
+            self._logger.log(
+                f"Object {obj.get_index_string()} is skipped (empty).", _LE.WARNING
+            )
+            return True
+        return False
+
+    # TODO: REMOVE THIS FUNCTION (see: write_molecule_to_sdf())
+    def _prepare_temp_input(self, tmp_dir: str, molecule: Chem.Mol) -> str:
+        _, tmp_sdf_path = gen_tmp_file(suffix=".sdf", dir=tmp_dir)
+        if molecule is None or not isinstance(molecule, Chem.Mol):
+            raise ValueError(
+                "Function requires input attribute to be an RDkit molecule."
+            )
+        writer = Chem.SDWriter(tmp_sdf_path)
+        writer.write(molecule)
+        writer.close()
+        self._logger.log(f"Wrote input molecule to file {tmp_sdf_path}.", _LE.DEBUG)
+        return tmp_sdf_path
+
+    def _get_sublists(self, get_first_n_lists: int = None) -> List[List[Subtask]]:
+        number_cores = self._get_number_cores()
+
+        # decide how to slice the ligand list depending on whether a maximum length is defined or not
+        if self.execution.parallelization.max_length_sublists is not None:
+            slice_size = min(
+                max(self.execution.parallelization.max_length_sublists, 1),
+                len(self._subtask_container),
+            )
+            return self._subtask_container.get_sublists(
+                partitions=None,
+                slice_size=slice_size,
+                get_first_n_lists=get_first_n_lists,
+            )
+        else:
+            # split the ligands into as many cores as available
+            partitions = min(number_cores, len(self._subtask_container))
+            return self._subtask_container.get_sublists(
+                partitions=partitions,
+                slice_size=None,
+                get_first_n_lists=get_first_n_lists,
+            )
+
+    def _get_number_cores(self):
+        # prepare the parallelization and set the number of cores to be used
+        cores = self.execution.parallelization.cores
+        if cores == 0:
+            cores = 1
+        elif cores < 0:
+            # subtract the number of cores (neg. value, thus add up) from total number of cores, e.g. -1 will
+            # use all available cores minus 1
+            cores = multiprocessing.cpu_count() + cores
+        return cores
+
+    def _print_log_file(self, path: str):
+        if os.path.isfile(path):
+            with open(path, "r") as log_file:
+                log_file_raw = log_file.readlines()
+                self._logger.log(f"Printing log file {path}:\n", _LE.DEBUG)
+                for line in log_file_raw:
+                    self._logger_blank.log(line.rstrip("\n"), _LE.DEBUG)
+                self._logger_blank.log("", _LE.DEBUG)
+                self._logger.log("--- End file", _LE.DEBUG)
+
+    def _add_data_to_generic(self, file, data, extension=None):
+        """Write data from arbitrary file to generic container class"""
+        file_name = file.split("/")[-1]
+        # file types where they can be passed as arguments in a subsequent step
+        # TODO: this is not maintainable!
+        file_tag = (
+            True
+            if file.endswith((".gro", "topol.top", "tpr", "fmp", "edge"))
+            else False
+        )
+        file = GenericData(
+            file_name=file_name, file_data=data, argument=file_tag, extension=extension
+        )
+        self.data.generic.add_file(file)
+
+    def _parse_output(
+        self,
+        tmp_dir,
+        exclusion_list=(
+            "#",
+            "AC",
+            "AC0",
+            "INF",
+            "hashed",
+            "metadata",
+            "timekeys",
+            "000000",
+        ),
+    ):
+        """Generic method for parsing generic writeout, can be overwritten in child classes"""
+        self.data.generic.clear_file_dict()
+        file_list = [os.path.join(tmp_dir, f) for f in os.listdir(tmp_dir)]
+        for file in file_list:
+            if os.path.isfile(file) and not file.endswith(exclusion_list):
+                try:
+                    with open(file, "r") as f:
+                        data = f.read()
+                    binary = False
+                except UnicodeDecodeError:
+                    with open(file, "rb") as f:
+                        data = f.read()
+                    binary = True
+                # work out if we handle the data or just the path to it on disk
+                file_size = os.stat(file).st_size
+                if file_size > float(_SBE.FILE_SIZE_THRESHOLD.value):
+                    # do not write to the dict - file is too large to store in memory
+                    _, tmp_path = gen_tmp_file(suffix="." + str(file).split(".")[-1])
+                    self._logger.log(
+                        f"Large file detected, storing at {tmp_path}", _LE.INFO
+                    )
+                    if binary:
+                        with open(tmp_path, "wb") as f:
+                            f.write(data)
+                    else:
+                        with open(tmp_path, "w") as f:
+                            f.write(data)
+                    data = tmp_path
+
+                self._add_data_to_generic(file, data)
+                self._logger.log(f"Stored data for file {file}", _LE.DEBUG)
+            elif os.path.isdir(file):
+                tmp_dir = mkdtemp()
+                copy_tree(file, tmp_dir)
+                self._add_data_to_generic(file=file, data=tmp_dir, extension="dir")
+
+                # we have picked up a directory, we want the entire contents copied somewhere
+
+    def _wait_until_file_generation(
+        self,
+        path,
+        path_log=None,
+        interval_sec=1,
+        maximum_sec=None,
+        success_strings: set = set(),
+        fail_strings: set = set(),
+    ) -> bool:
+        # TODO: Refactor that without breaking the Glide dependency.
+        counter = 0
+        while not os.path.exists(path):
+            # wait for an interval
+            time.sleep(interval_sec)
+            counter = counter + 1
+
+            # if a Glide logfile path has been specified, check, whether critical messages indicating an abort are there
+            # note, that we return "True" to indicate that the "file generation" has nevertheless been completed
+            if path_log is not None:
+                if any_in_file(path_log, fail_strings):
+                    self._logger.log(
+                        f"A critical error occurred in sublist execution.", _LE.WARNING
+                    )
+                    self._print_log_file(path_log)
+                    return True
+                if any_in_file(path_log, success_strings):
+                    # log file indicates job is done; give a bit of leeway to ensure the writing is done
+                    time.sleep(3)
+                    break
+
+            # if there's time left, proceed
+            if maximum_sec is not None and counter * interval_sec >= maximum_sec:
+                break
+        if os.path.exists(path):
+            return True
+        else:
+            return False
+
+    def _log_execution_progress(self):
+        number_tasks_done = len(self._subtask_container.get_done_tasks())
+        number_tasks_total = len(self._subtask_container.subtasks)
+        self._logger.log(
+            get_progress_bar_string(number_tasks_done, number_tasks_total, length=65),
+            _LE.INFO,
+        )
diff --git a/icolos/core/workflow_steps/structure_prediction/__init__.py b/icolos/core/workflow_steps/structure_prediction/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/core/workflow_steps/structure_prediction/disicl.py b/icolos/core/workflow_steps/structure_prediction/disicl.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/core/workflow_steps/structure_prediction/dssp.py b/icolos/core/workflow_steps/structure_prediction/dssp.py
new file mode 100644
index 0000000..9a5151e
--- /dev/null
+++ b/icolos/core/workflow_steps/structure_prediction/dssp.py
@@ -0,0 +1,56 @@
+from typing import List
+from icolos.core.workflow_steps.step import StepBase
+from icolos.utils.execute_external.execute import Executor
+from pydantic import BaseModel
+from icolos.utils.enums.step_enums import StepDSSPEnum
+from icolos.utils.enums.program_parameters import DSSPEnum
+import os
+
+
+_SDE = StepDSSPEnum()
+_DE = DSSPEnum()
+
+
+class StepDSSP(StepBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+        self._initialize_backend(executor=Executor)
+
+    def _construct_arguments(self, tmp_dir: str, file: str) -> List:
+        args = []
+        for flag in self.settings.arguments.flags:
+            args.append(flag)
+        for key, value in self.settings.arguments.parameters.items():
+            args.append(key)
+            args.append(value)
+
+        # set the input and output files
+        args.append(file)
+        output = f"dssp_output_{file.split('.')[0]}.txt"
+        args.append(output)
+        return args
+
+    def _parse_output(self, tmp_dir: str) -> None:
+        for file in [f for f in os.listdir(tmp_dir) if f.endswith("txt")]:
+            with open(os.path.join(tmp_dir, file), "r") as f:
+                self._add_data_to_generic(file, f.read())
+
+    def execute(self):
+        """
+        Executes dssp on a set of input structures
+        """
+
+        tmp_dir = self._make_tmpdir()
+        print(tmp_dir)
+        self.data.generic.write_out_all_files(tmp_dir)
+
+        file_list = self.data.generic.get_file_names_by_extension(ext="pdb")
+
+        for file in file_list:
+            arguments = self._construct_arguments(tmp_dir, file)
+            self._backend_executor.execute(
+                command=_DE.MKDSSP, arguments=arguments, check=True, location=tmp_dir
+            )
+
+        self._parse_output(tmp_dir)
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/structure_prediction/pdb_fixer.py b/icolos/core/workflow_steps/structure_prediction/pdb_fixer.py
new file mode 100644
index 0000000..9d46ed0
--- /dev/null
+++ b/icolos/core/workflow_steps/structure_prediction/pdb_fixer.py
@@ -0,0 +1,66 @@
+# implement pdbfixer as FOSS alternative to proteinprep
+from icolos.utils.enums.step_enums import StepPdbFixerEnum
+from icolos.core.workflow_steps.step import StepBase
+from icolos.utils.enums.program_parameters import PdbFixerEnum
+from icolos.utils.execute_external.execute import Executor
+from pydantic import BaseModel
+from pdbfixer.pdbfixer import PDBFixer
+import os
+
+
+_SFE = StepPdbFixerEnum()
+_FE = PdbFixerEnum()
+
+
+class StepPdbFixer(StepBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+        self._initialize_backend(executor=Executor)
+
+    def _parse_arguments(self):
+        default_flags = [
+            "--replace-nonstandard",
+            "--add-residues",
+        ]
+        default_params = {
+            "--ph": "7.0",
+            "--add-atoms": "all",
+            "--keep-heterogens": "all",
+        }
+        arguments = []
+        for arg in self.settings.arguments.flags:
+            arguments.append(arg)
+        for key, value in self.settings.arguments.parameters.items():
+            formatted_arg = f"{key}={value}"
+            arguments.append(formatted_arg)
+        for key in default_flags:
+            if key not in self.settings.arguments.flags:
+                arguments.append(key)
+        for key, value in default_params.items():
+            if key not in self.settings.arguments.parameters.keys():
+                formatted_arg = f"{key}={value}"
+                arguments.append(formatted_arg)
+        return arguments
+
+    def execute(self):
+
+        tmp_dir = self._make_tmpdir()
+
+        self.data.generic.write_out_all_files(tmp_dir)
+        pdb_files = self.data.generic.get_file_names_by_extension("pdb")
+
+        arguments = self._parse_arguments()
+
+        for file in pdb_files:
+            path = os.path.join(tmp_dir, file)
+            arguments.extend(["--output", path])
+            arguments = [path] + arguments
+
+            self._backend_executor.execute(
+                command=_FE.FIXER, arguments=arguments, location=tmp_dir, check=True
+            )
+
+            #
+        self._parse_output(tmp_dir)
+
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/structure_prediction/peptide_embedder.py b/icolos/core/workflow_steps/structure_prediction/peptide_embedder.py
new file mode 100644
index 0000000..f0b6a53
--- /dev/null
+++ b/icolos/core/workflow_steps/structure_prediction/peptide_embedder.py
@@ -0,0 +1,36 @@
+from pydantic import BaseModel
+from icolos.core.workflow_steps.step import StepBase
+from Bio import SeqIO
+from Bio.PDB import PDBIO
+import os
+import PeptideBuilder
+from PeptideBuilder import Geometry
+
+
+class StepPeptideEmbedder(StepBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def execute(self):
+        # use the PeptideBuilder python library to build a rough peptide structure using
+        # sensible psi, phi angles etc, for subsequent simulation
+
+        tmp_dir = self._make_tmpdir()
+        self.data.generic.write_out_all_files(tmp_dir)
+        # Extract the peptide sequence from the provided fasta file
+        fasta_file = self.data.generic.get_argument_by_extension("fasta")
+
+        sequences = list(SeqIO.parse(os.path.join(tmp_dir, fasta_file), format="fasta"))
+
+        for idx, seq in enumerate(sequences):
+            geom = [Geometry.geometry(aa) for aa in seq.seq]
+            structure = PeptideBuilder.make_structure_from_geos(geom)
+
+            out = PDBIO()
+            out.set_structure(structure)
+            # TODO: find a better naming strategy than this
+            out.save(os.path.join(tmp_dir, f"sequence_{idx}.pdb"))
+
+        self._parse_output(tmp_dir)
+
+        self._remove_temporary(tmp_dir)
diff --git a/icolos/core/workflow_steps/structure_prediction/rosetta_abinitio.py b/icolos/core/workflow_steps/structure_prediction/rosetta_abinitio.py
new file mode 100644
index 0000000..385fdbb
--- /dev/null
+++ b/icolos/core/workflow_steps/structure_prediction/rosetta_abinitio.py
@@ -0,0 +1,15 @@
+from icolos.core.workflow_steps.step import StepBase
+from pydantic import BaseModel
+from icolos.utils.execute_external.rosetta import RosettaExecutor
+
+# Wrapping for Rosetta ab initio structure prediction
+# Note the execution is finicky, and requires some proper set up to get this to work
+# we run everything locally, since public web servers are not an option for us.
+# some config is required to get make_fragments.pl to run with all its dependencies in place
+
+
+class StepRosettaAbinitio(StepBase, BaseModel):
+    def __init__(self, **data):
+        super().__init__(data)
+
+        self._inititalize_backend(executor=RosettaExecutor)
diff --git a/icolos/loggers/__init__.py b/icolos/loggers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/loggers/agentlogger.py b/icolos/loggers/agentlogger.py
new file mode 100644
index 0000000..a77754f
--- /dev/null
+++ b/icolos/loggers/agentlogger.py
@@ -0,0 +1,12 @@
+import logging
+
+from icolos.loggers.base_logger import BaseLogger
+
+
+class AgentLogger(BaseLogger):
+    def __init__(self):
+        super().__init__()
+
+    def _initialize_logger(self):
+        logger = logging.getLogger(self._LE.LOGGER_AGENT)
+        return logger
diff --git a/icolos/loggers/base_logger.py b/icolos/loggers/base_logger.py
new file mode 100644
index 0000000..a3e0254
--- /dev/null
+++ b/icolos/loggers/base_logger.py
@@ -0,0 +1,27 @@
+from abc import ABC, abstractmethod
+
+from icolos.utils.enums.logging_enums import LoggingConfigEnum
+
+
+class BaseLogger(ABC):
+    def __init__(self):
+        self._LE = LoggingConfigEnum()
+        self._logger = self._initialize_logger()
+
+    def log(self, message: str, level: str):
+        if level == self._LE.DEBUG:
+            self._logger.debug(message)
+        elif level == self._LE.INFO:
+            self._logger.info(message)
+        elif level == self._LE.WARNING:
+            self._logger.warning(message)
+        elif level == self._LE.ERROR:
+            self._logger.error(message)
+        elif level == self._LE.EXCEPTION:
+            self._logger.exception(message)
+        else:
+            raise ValueError("Logger level not supported.")
+
+    @abstractmethod
+    def _initialize_logger(self):
+        raise NotImplementedError("Overwrite this method in child classes.")
diff --git a/icolos/loggers/blank_logger.py b/icolos/loggers/blank_logger.py
new file mode 100644
index 0000000..71d3708
--- /dev/null
+++ b/icolos/loggers/blank_logger.py
@@ -0,0 +1,14 @@
+import logging
+
+from icolos.loggers.base_logger import BaseLogger
+
+
+class BlankLogger(BaseLogger):
+    """This logger serves as a "verbatim" interface."""
+
+    def __init__(self):
+        super().__init__()
+
+    def _initialize_logger(self):
+        logger = logging.getLogger(self._LE.LOGGER_BLANK)
+        return logger
diff --git a/icolos/loggers/entrypoint_logger.py b/icolos/loggers/entrypoint_logger.py
new file mode 100644
index 0000000..8406660
--- /dev/null
+++ b/icolos/loggers/entrypoint_logger.py
@@ -0,0 +1,12 @@
+import logging
+
+from icolos.loggers.base_logger import BaseLogger
+
+
+class EntryPointLogger(BaseLogger):
+    def __init__(self):
+        super().__init__()
+
+    def _initialize_logger(self):
+        logger = logging.getLogger(self._LE.LOGGER_ENTRYPOINT)
+        return logger
diff --git a/icolos/loggers/iologger.py b/icolos/loggers/iologger.py
new file mode 100644
index 0000000..86cec06
--- /dev/null
+++ b/icolos/loggers/iologger.py
@@ -0,0 +1,12 @@
+import logging
+
+from icolos.loggers.base_logger import BaseLogger
+
+
+class IOLogger(BaseLogger):
+    def __init__(self):
+        super().__init__()
+
+    def _initialize_logger(self):
+        logger = logging.getLogger(self._LE.LOGGER_IO)
+        return logger
diff --git a/icolos/loggers/logger_utils.py b/icolos/loggers/logger_utils.py
new file mode 100644
index 0000000..6133f15
--- /dev/null
+++ b/icolos/loggers/logger_utils.py
@@ -0,0 +1,4 @@
+def log_multiline_string(logger, level: str, multi_line_string: str):
+    splitted = multi_line_string.split("\n")
+    for line in splitted:
+        logger.log(line, level)
diff --git a/icolos/loggers/steplogger.py b/icolos/loggers/steplogger.py
new file mode 100644
index 0000000..a83d64e
--- /dev/null
+++ b/icolos/loggers/steplogger.py
@@ -0,0 +1,12 @@
+import logging
+
+from icolos.loggers.base_logger import BaseLogger
+
+
+class StepLogger(BaseLogger):
+    def __init__(self):
+        super().__init__()
+
+    def _initialize_logger(self):
+        logger = logging.getLogger(self._LE.LOGGER_STEP)
+        return logger
diff --git a/icolos/scripts/__init__.py b/icolos/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/scripts/cli.py b/icolos/scripts/cli.py
new file mode 100644
index 0000000..8331502
--- /dev/null
+++ b/icolos/scripts/cli.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import json
+import argparse
+
+from icolos.core.composite_agents.workflow import WorkFlow
+
+from icolos.loggers.entrypoint_logger import EntryPointLogger
+
+from icolos.utils.enums.composite_agents_enums import WorkflowEnum
+from icolos.utils.enums.logging_enums import LoggingConfigEnum
+from icolos.utils.enums.entry_points import ExecutorEnum
+
+from icolos.utils.entry_point_functions.logging_helper_functions import (
+    initialize_logging,
+)
+from icolos.utils.entry_point_functions.parsing_functions import parse_header
+from icolos.utils.general.files_paths import attach_root_path
+
+
+class IcolosCLI:
+    def __init__(self) -> None:
+        # enums
+        _LE = LoggingConfigEnum()
+        _EE = ExecutorEnum()
+        _WE = WorkflowEnum()
+
+        # initialize logger
+        logger = EntryPointLogger()
+
+        # get the input parameters and parse them
+        parser = argparse.ArgumentParser(
+            description='Implements entry point for the "Icolos" workflow class.'
+        )
+        parser.add_argument(
+            "-conf",
+            type=str,
+            default=None,
+            help="A path to an workflow's configuration file (JSON dictionary) that is to be executed.",
+        )
+        parser.add_argument(
+            "-debug",
+            action="store_true",
+            help='Set this flag to activate the inbuilt debug logging mode (this will overwrite parameter "-log_conf", if set).',
+        )
+        parser.add_argument(
+            "--global_variables",
+            nargs="+",
+            default=None,
+            type=str,
+            help='List of strings, setting global variables with key and value, e.g. "root:/path/to/root".',
+        )
+        parser.add_argument(
+            "--global_settings",
+            nargs="+",
+            default=None,
+            type=str,
+            help='List of strings, setting global settings with key and value, e.g. "remove_temporary:False".',
+        )
+        args, args_unk = parser.parse_known_args()
+
+        if args.conf is None or not os.path.isfile(args.conf):
+            raise Exception(
+                'Parameter "-conf" must be a relative or absolute path to a configuration (JSON) file.'
+            )
+
+        # load configuration
+        with open(args.conf) as file:
+            conf = file.read().replace("\r", "").replace("\n", "")
+            conf = json.loads(conf)
+
+        # set the logging configuration according to parameters
+        log_conf = attach_root_path(_LE.PATH_CONFIG_DEFAULT)
+        if args.debug:
+            log_conf = attach_root_path(_LE.PATH_CONFIG_DEBUG)
+        logger = initialize_logging(log_conf_path=log_conf, workflow_conf=conf)
+
+        # update global variables and settings
+        conf = parse_header(
+            conf=conf,
+            args=args,
+            entry_point_path=os.path.realpath(__file__),
+            logger=logger,
+        )
+
+        # generate workflow object
+        workflow = WorkFlow(**conf[_WE.WORKFLOW])
+        workflow.initialize()
+
+        # execute the whole workflow
+        workflow.execute()
+
+        sys.exit(0)
+
+
+def entry_point():
+    IcolosCLI()
+
+
+if __name__ == "__main__":
+    entry_point()
diff --git a/icolos/utils/__init__.py b/icolos/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/utils/constants.py b/icolos/utils/constants.py
new file mode 100644
index 0000000..77dffc8
--- /dev/null
+++ b/icolos/utils/constants.py
@@ -0,0 +1,2 @@
+CONSTANT_T = 298  # temperature in Kelvin
+CONSTANT_KB = 0.00198720425864  # Boltzmann constant in kcal / mol / Kelvin
diff --git a/icolos/utils/entry_point_functions/__init__.py b/icolos/utils/entry_point_functions/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/utils/entry_point_functions/logging_helper_functions.py b/icolos/utils/entry_point_functions/logging_helper_functions.py
new file mode 100644
index 0000000..46dce4d
--- /dev/null
+++ b/icolos/utils/entry_point_functions/logging_helper_functions.py
@@ -0,0 +1,29 @@
+import json
+import logging.config as logging_config
+from icolos.loggers.entrypoint_logger import EntryPointLogger
+from icolos.utils.enums.composite_agents_enums import WorkflowEnum
+from icolos.utils.enums.logging_enums import LoggingConfigEnum
+
+from icolos.utils.general.convenience_functions import *
+
+_WE = WorkflowEnum()
+_LE = LoggingConfigEnum()
+
+
+def initialize_logging(log_conf_path: str, workflow_conf: dict) -> EntryPointLogger:
+    with open(log_conf_path, "r") as f:
+        log_conf_dict = json.load(f)
+    header = nested_get(workflow_conf, [_WE.WORKFLOW, _WE.HEADER], default={})
+    if in_keys(header, [_WE.LOGGING, _WE.LOGGING_LOGFILE]):
+        try:
+            log_conf_dict["handlers"]["file_handler"]["filename"] = nested_get(
+                header, [_WE.LOGGING, _WE.LOGGING_LOGFILE], None
+            )
+            log_conf_dict["handlers"]["file_handler_blank"]["filename"] = nested_get(
+                header, [_WE.LOGGING, _WE.LOGGING_LOGFILE], None
+            )
+        except KeyError:
+            pass
+    logging_config.dictConfig(log_conf_dict)
+    logger = EntryPointLogger()
+    return logger
diff --git a/icolos/utils/entry_point_functions/parsing_functions.py b/icolos/utils/entry_point_functions/parsing_functions.py
new file mode 100644
index 0000000..adcccd4
--- /dev/null
+++ b/icolos/utils/entry_point_functions/parsing_functions.py
@@ -0,0 +1,75 @@
+import os
+
+from icolos.loggers.base_logger import BaseLogger
+
+from icolos.utils.enums.composite_agents_enums import WorkflowEnum
+from icolos.utils.enums.entry_points import ExecutorEnum
+from icolos.utils.enums.logging_enums import LoggingConfigEnum
+
+_WE = WorkflowEnum()
+_LE = LoggingConfigEnum()
+_EE = ExecutorEnum()
+
+
+def parse_global(g_input, logger: BaseLogger) -> dict:
+    if g_input is not None:
+        if not isinstance(g_input, list):
+            g_input = [g_input]
+        g_vars = {}
+        for new_var in g_input:
+            parts = new_var.split(":")
+            if len(parts) != 2:
+                logger.log(
+                    f"Ignoring global input {new_var} set by command-line, as they must have one key and one value, separated by ':'.",
+                    _LE.WARNING,
+                )
+                continue
+            g_vars[parts[0]] = parts[1]
+            logger.log(
+                f'Parsed global input "{parts[0]}" (value: "{parts[1]}").', _LE.DEBUG
+            )
+        return g_vars
+    else:
+        return {}
+
+
+def add_global(configuration: dict, g_vars: dict, field: str) -> dict:
+    """This function adds (and overwrites) values for global settings and variables. Parameter "field" selects,
+    which key is to be used in the header region."""
+    header = configuration[_WE.WORKFLOW][_WE.HEADER]
+    if field not in header.keys():
+        header[field] = {}
+    for key, value in g_vars.items():
+        header[field][key] = value
+    return configuration
+
+
+def get_runtime_global_variables(args_conf: str, entry_point_path: str) -> dict:
+    return {  # current workdir
+        _EE.RUNTIME_GLOBAL_VARIABLE_WORKDIR: os.getcwd(),
+        # directory where the entry point lies
+        _EE.RUNTIME_GLOBAL_VARIABLE_ENTRYPOINTDIR: os.path.dirname(entry_point_path),
+        # directory where the JSON lies
+        _EE.RUNTIME_GLOBAL_VARIABLE_CONFIGDIR: os.path.dirname(
+            os.path.abspath(args_conf)
+        ),
+    }
+
+
+def parse_header(conf: dict, args, entry_point_path: str, logger: BaseLogger) -> dict:
+    # parse global variables from command-line
+    global_vars_CLI = parse_global(g_input=args.global_variables, logger=logger)
+    conf = add_global(conf, global_vars_CLI, _WE.GLOBAL_VARIABLES)
+
+    # add run-specified global variables (the current directory, the JSONs directory, ...)
+    conf = add_global(
+        conf,
+        get_runtime_global_variables(args.conf, entry_point_path),
+        _WE.GLOBAL_VARIABLES,
+    )
+
+    # update global settings; if they are not supported, pydantic will complain later on
+    # TODO: at the moment the implementation ignores stuff that is not understood (e.g. when a typo occurs); this should fail
+    global_settings_CLI = parse_global(g_input=args.global_settings, logger=logger)
+    conf = add_global(conf, global_settings_CLI, _WE.GLOBAL_SETTINGS)
+    return conf
diff --git a/icolos/utils/enums/__init__.py b/icolos/utils/enums/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/utils/enums/composite_agents_enums.py b/icolos/utils/enums/composite_agents_enums.py
new file mode 100644
index 0000000..7f56949
--- /dev/null
+++ b/icolos/utils/enums/composite_agents_enums.py
@@ -0,0 +1,59 @@
+class BaseAgentEnum:
+
+    HEADER = "header"
+    STEPS = "steps"
+
+    # header
+    # ---------
+    ID = "id"
+    DESCRIPTION = "description"
+    GLOBAL_VARIABLES = "global_variables"
+    GLOBAL_SETTINGS = "global_settings"
+    LOGGING = "logging"
+    LOGGING_LOGFILE = "logfile"
+
+    # exporting environment variables
+    ENVIRONMENT = "environment"
+    ENVIRONMENT_EXPORT = "export"
+    ENVIRONMENT_EXPORT_KEY = "key"
+    ENVIRONMENT_EXPORT_VALUE = "value"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class WorkflowEnum(BaseAgentEnum):
+
+    WORKFLOW = "workflow"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class SchedulerEnum(BaseAgentEnum):
+
+    SCHEDULER = "scheduler"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
diff --git a/icolos/utils/enums/compound_enums.py b/icolos/utils/enums/compound_enums.py
new file mode 100644
index 0000000..46025c5
--- /dev/null
+++ b/icolos/utils/enums/compound_enums.py
@@ -0,0 +1,56 @@
+class CompoundTagsEnum:
+
+    CONFORMER_ENERGY_TAG = "conformer_energy"
+    FORMAL_CHARGE_TAG = "formal_charge"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class CompoundContainerEnum:
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class EnumerationContainerEnum:
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class ConformerContainerEnum:
+
+    EXTRA_DATA_COSMOFILE = "cosmo_file"
+    EXTRA_DATA_COORDFILE = "coord_file"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
diff --git a/icolos/utils/enums/entry_points.py b/icolos/utils/enums/entry_points.py
new file mode 100644
index 0000000..d912d52
--- /dev/null
+++ b/icolos/utils/enums/entry_points.py
@@ -0,0 +1,15 @@
+class ExecutorEnum:
+
+    RUNTIME_GLOBAL_VARIABLE_WORKDIR = "work_dir"
+    RUNTIME_GLOBAL_VARIABLE_ENTRYPOINTDIR = "entrypoint_dir"
+    RUNTIME_GLOBAL_VARIABLE_CONFIGDIR = "config_dir"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
diff --git a/icolos/utils/enums/execution_enums.py b/icolos/utils/enums/execution_enums.py
new file mode 100644
index 0000000..9a51932
--- /dev/null
+++ b/icolos/utils/enums/execution_enums.py
@@ -0,0 +1,13 @@
+from enum import Enum
+
+
+class ExecutionResourceEnum(str, Enum):
+    LOCAL = "local"
+    SLURM = "slurm"
+    PARTITION = "partition"
+    TIME = "time"
+    GRES = "gres"
+    MEM = "mem"
+    CORES = "cores"
+    CORE = "core"
+    GPU = "gpu"
diff --git a/icolos/utils/enums/flow_control_enums.py b/icolos/utils/enums/flow_control_enums.py
new file mode 100644
index 0000000..50b7093
--- /dev/null
+++ b/icolos/utils/enums/flow_control_enums.py
@@ -0,0 +1,15 @@
+from icolos.core.workflow_steps.prediction.active_learning import StepActiveLearning
+from icolos.utils.enums.step_enums import StepBaseEnum
+from icolos.core.flow_control.iterator import StepIterator
+
+_SBE = StepBaseEnum
+
+
+class FlowControlInitializationEnum:
+    # These steps are responsible for initializing other steps as part of their execution
+    # Keep these separate to the main pool of steps to avoid circular imports
+
+    FLOW_CONTROL_INIT_DICT = {
+        _SBE.STEP_ITERATOR: StepIterator,
+        _SBE.STEP_ACTIVE_LEARNING: StepActiveLearning,
+    }
diff --git a/icolos/utils/enums/general_utils_enums.py b/icolos/utils/enums/general_utils_enums.py
new file mode 100644
index 0000000..1244ee4
--- /dev/null
+++ b/icolos/utils/enums/general_utils_enums.py
@@ -0,0 +1,15 @@
+class CheckFileGenerationEnum:
+
+    GENERATED_SUCCESS = "generated_success"
+    GENERATED_EMPTY = "generated_empty"
+    NOT_GENERATED = "not_generated"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
diff --git a/icolos/utils/enums/input_enums.py b/icolos/utils/enums/input_enums.py
new file mode 100644
index 0000000..425f9b8
--- /dev/null
+++ b/icolos/utils/enums/input_enums.py
@@ -0,0 +1,19 @@
+class InputEnum:
+
+    SOURCE_FIELD_COMPOUNDS = "compounds"
+    TARGET_FIELD_COMPOUNDS = "compounds"
+    TARGET_FIELD_CONFORMERS = "conformers"
+
+    # REINVENT-compatible JSON input
+    JSON_NAMES = "names"
+    JSON_SMILES = "smiles"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
diff --git a/icolos/utils/enums/logging_enums.py b/icolos/utils/enums/logging_enums.py
new file mode 100644
index 0000000..fc26240
--- /dev/null
+++ b/icolos/utils/enums/logging_enums.py
@@ -0,0 +1,31 @@
+class LoggingConfigEnum:
+
+    # set levels (for now, they match to the "logging" default ones)
+    DEBUG = "debug"
+    INFO = "info"
+    WARNING = "warning"
+    ERROR = "error"
+    EXCEPTION = "exception"
+
+    # paths to the configuration JSONs that are shipped with Icolos
+    PATH_CONFIG_DEFAULT = "icolos/config/logging/default.json"
+    PATH_CONFIG_VERBOSE = "icolos/config/logging/verbose.json"
+    PATH_CONFIG_DEBUG = "icolos/config/logging/debug.json"
+    PATH_CONFIG_TUTORIAL = "icolos/config/logging/tutorial.json"
+
+    # high-level loggers defined in the configurations
+    LOGGER_IO = "io"
+    LOGGER_STEP = "step"
+    LOGGER_AGENT = "agent"
+    LOGGER_ENTRYPOINT = "entrypoint"
+    LOGGER_BLANK = "blank"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
diff --git a/icolos/utils/enums/parallelization.py b/icolos/utils/enums/parallelization.py
new file mode 100644
index 0000000..56022f1
--- /dev/null
+++ b/icolos/utils/enums/parallelization.py
@@ -0,0 +1,18 @@
+from enum import Enum
+
+
+class ParallelizationEnum(str, Enum):
+
+    STATUS_READY = "ready"
+    STATUS_SUCCESS = "success"
+    STATUS_FAILED = "failed"
+
+    # try to find the internal value and return
+    # def __getattr__(self, name):
+    #     if name in self:
+    #         return name
+    #     raise AttributeError
+
+    # # prohibit any attempt to set any values
+    # def __setattr__(self, key, value):
+    #     raise ValueError("No changes allowed.")
diff --git a/icolos/utils/enums/program_parameters.py b/icolos/utils/enums/program_parameters.py
new file mode 100644
index 0000000..b3d6675
--- /dev/null
+++ b/icolos/utils/enums/program_parameters.py
@@ -0,0 +1,1428 @@
+class TurbomoleEnum:
+
+    # general
+    # ---------
+    COORD = "coord"  # hard-coded file name of input coordinates
+    CONTROL = "control"  # hard-coded file name for control script
+    TM_CONFIG_DIR = "tm_config_dir"  # directory path where the *.tm configurations lie
+    # the basename of the parameter set chosen, e.g. "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge"
+    TM_CONFIG_BASENAME = "tm_config_basename"
+    # this does not contain the charge or the ending
+    # full name e.g. "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge-1.tm"
+    TM_CONFIG_ENDING = ".tm"  # ending of turbomole configuration files
+    TM_CONFIG_COSMO = "tm_config_cosmo"  # path to the COSMO configuration file
+    # this needs to be set for each turbomole calculation (current folder)
+    TM_TURBOTMPDIR = "TURBOTMPDIR"
+    TM_OUTPUT_COSMOFILE = "mol.cosmo"  # hard-coded file name of the turbomole output
+    TM_OUTPUT_COORDFILE = "coord"
+    TM_OUTPUT_FINAL_XYZ = "final.xyz"  # hard-coded file name for turbomole output
+    TM_OUTPUT_FINAL_SDF = "final.sdf"
+
+    # the "ridft" binary (+ configuration elements)
+    # ---------
+    TM_RIDFT = "ridft"  # do DFT calculation with RI-J approximation for
+    # inter-electronic Coulomb term
+    TM_RIDFT_FAIL_IDENTIFICATION_STRING = "ridft ended abnormally"
+    TM_RIDFT_SUCCESS_STRING = (
+        "ridft ended normally"  # if this string is in stderr, execution was successful
+    )
+
+    # the "jobex" binary (+ configuration elements)
+    # ---------
+    TM_JOBEX = "jobex"  # used for DFT optimization
+    TM_JOBEX_C = "-c"
+    TM_JOBEX_GCART = "-gcart"
+    TM_JOBEX_FAIL_IDENTIFICATION_STRING = "jobex ended abnormally"
+    TM_JOBEX_SUCCESS_STRING = "jobex ended normally"
+
+    # the "cosmoprep" binary (+ configuration elements)
+    # ---------
+    TM_COSMOPREP = "cosmoprep"
+    TM_COSMOPREP_SUCCESS_STRING = "cosmoprep ended normally"
+
+    # the "define" binary (+ configuration elements)
+    # ---------
+    TM_DEFINE = "define"
+    TM_DEFINE_SUCCESS_STRING = (
+        "define ended normally"  # if this string is in stderr, execution was successful
+    )
+
+    # the "x2t" binary (+ configuration elements)
+    # ---------
+    TM_X2T = (
+        "x2t"  # program to translate an XYZ file to TM input: x2t input.xyz > coord
+    )
+    TM_X2T_SUCCESS_STRING = (
+        "$coord"  # if this string is in stdout, execution was successful
+    )
+
+    # the "t2x" binary (+ configuration elements)
+    # ---------
+    # program to translate an TM input to an XYZ file (last snapshot): t2x -c > final.xyz
+    TM_T2X = "t2x"
+    TM_T2X_C = "-c"
+
+    # the "cosmotherm" binary (+ configuration elements)
+    # ---------
+    CT_COSMOTHERM = "cosmotherm"  # the cosmotherm binary
+    CT_COSMOTHERM_FAIL_STRING = (
+        "COSMOtherm ERROR Termination"  # if this string is in stderr, the job failed
+    )
+    CT_COSMOTHERM_CONFIG_FILE = (
+        "cosmotherm.inp"  # hard-coded name of the input file generated before execution
+    )
+    CT_COSMOTHERM_OUTPUT_FILE = (
+        "cosmotherm.out"  # hard-coded name of the output file generated by cosmotherm
+    )
+    CT_COSMOTHERM_TAB_ENDING = "cosmother.tab"
+
+    CT_CONFIG = "cosmotherm_config"
+    CT_CONFIG_DEFAULTPATH = "icolos/config/cosmo/default_cosmo.config"
+
+    # control script fields
+    # ---------
+    CONTROL_COSMO_OUT = (
+        # line, after which insertion is to be put (separate line)
+        "$cosmo_out file=n"
+    )
+    CONTROL_COSMO_INSERTION = "$cosmo_isorad"
+    CONTROL_COSMO_REPLACE = (
+        "$cosmo_out file=mol.cosmo"  # after insertion, replace "$cosmo_out" with this
+    )
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class CosmoOutputEnum:
+
+    PATTERN = "pattern"
+    ELEMENT = "element"
+
+    # general block
+    # ---------
+    # this works, because we always set the name of the compound to "mol"
+    GENERAL_BLOCK_PATTERN_STRING = "--- Compound 1 (mol) ---"
+    # alternatively, one could search for "Compound 1 ", as we also feed it in in order
+
+    # the key (e.g. "E_cosmo") is the tag name for the SDF write-out, "pattern" identifies the line and
+    # "element" is the number (on the right side, after the ':') of the element (starting with 0) from a
+    # split using ' ', respectively
+    GENERAL_BLOCK_ANNOTATIONS = {
+        "E_cosmo": {"pattern": "E_COSMO+dE", "element": 0},
+        "volume": {"pattern": "Volume", "element": 0},
+        "area": {"pattern": "Area", "element": 0},
+        "dipole": {"pattern": "Dipole moment", "element": 0},
+        "HB_acc": {"pattern": "H-bond moment (accept)", "element": 0},
+        "HB_don": {"pattern": "H-bond moment (donor)", "element": 0},
+        "sigma1": {"pattern": "Sigma moments", "element": 0},
+        "sigma2": {"pattern": "Sigma moments", "element": 1},
+        "sigma3": {"pattern": "Sigma moments", "element": 2},
+        "sigma4": {"pattern": "Sigma moments", "element": 3},
+        "sigma5": {"pattern": "Sigma moments", "element": 4},
+        "sigma6": {"pattern": "Sigma moments", "element": 5},
+    }
+
+    # solvent blocks
+    # ---------
+    SOLVENT_BLOCK_PATTERN_STRING = "Gibbs Free Energy of Solvation"
+    SOLVENT_BLOCK_START_PATTERN = "----------------------"
+    SOLVENT_BLOCK_BODY_START_PATTERN = "Compound: 1  (mol)"
+    SOLVENT_TRANSLATE_SOLVENT = {
+        "h2o": "h2o",
+        "methanol": "meoh",
+        "1-octanol": "octanol",
+        "dimethyls": "dmso",
+        "cyclohexa": "cychex",
+        "chcl3": "chcl3",
+        "acetonitr": "acn",
+        "thf": "thf",
+    }
+    SOLVENT_REPLACEHOLDER = "{solvent}"
+    SOLVENT_BLOCK_BODY_ANNOTATIONS = {
+        "Gsolv_{solvent}": {"pattern": "Gibbs Free Energy of Solvation", "element": 0},
+        "G_{solvent}": {"pattern": "Free energy of molecule in mix", "element": 0},
+    }
+    SOLVENT_BLOCK_HEADER_COMPOUNDS_PATTERN = "Compound   "
+    SOLVENT_BLOCK_HEADER_MOLFRACTION_PATTERN = "Mole Fraction"
+    SOLVENT_BLOCK_CURRENT_FRACTION_VALUE = "1.0000"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class FeatureCounterEnum:
+
+    PROPERTY_NUM_RINGS = "num_rings"
+    PROPERTY_NUM_AROMATIC_RINGS = "num_aromatic_rings"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class CrestEnum:
+
+    # Note: The first argument is usually a coordinate file in TM (coord, Bohr),
+    #       Xmol (*.xyz, Ang.) or SDF format.
+    # Call: "crest <input_file> <parameters>"
+    # This collection is based on Version 2.10.2, compatible with XTB version 6.1 and later
+
+    # General options (all)
+    # ---------
+    CREST = "crest"  # binary name
+    CREST_H = "-h"  # print the help message
+    CREST_HELP_IDENTIFICATION_STRING = "Conformer-Rotamer Ensemble Sampling Tool"
+    CREST_V3 = "-v3"  # version 3 (the default iMTD-GC workflows)
+    CREST_G = "-g"  # 1 string parameter; use GBSA implicit solvent for solvent <string>
+    CREST_CHRG = "-chrg"  # 1 int parameter; the molecule's charge
+    CREST_UHF = "-uhf"  # 1 int parameter; set <int>=Nα-Nβ electrons
+    CREST_NOZS = (
+        "-nozs"  # do not perform z-mat sorting, default: z-matrix will be sorted
+    )
+    CREST_ZS = "-zs"  # perform z-matrix sorting [default]
+    # 1 level parameter (vloose, loose, normal, tight, vtight); default: vtight
+    CREST_OPT = "-opt"
+    CREST_GFN1 = "-gfn1"  # use GFN1-xTB
+    CREST_GFN2 = "-gfn2"  # use GFN2-xTB [default]
+    CREST_GFF = "-gff"  # use GFN-FF (requires xtb 6.3 or newer)
+    # 1 string parameter; specify name of the xtb binary that should be used
+    CREST_XNAM = "-xnam"
+    # 1 float parameter; set energy window in kcl/mol, default: 6.0 kcal/mol
+    CREST_EWIN = "-ewin"
+    CREST_RTHR = (
+        "-rthr"  # 1 float parameter; set RMSD threshold in Ang, default: 0.125 Ang
+    )
+    CREST_ETHR = (
+        "-ethr"  # 1 float parameter; set E threshold in kcal/mol, default: 0.1 kcal/mol
+    )
+    CREST_BTHR = (
+        "-bthr"  # 1 float parameter; set Rot. const. threshold, default: 15.0 MHz
+    )
+    # 1 float parameter; Boltzmann population threshold, default: 0.05 (= 5%)
+    CREST_PTHR = "-pthr"
+    CREST_EQV = "-eqv"  # activate NMR-equivalence printout
+    CERST_NMR = "-nmr"  # activate NMR-mode (= [-eqv] + opt. level: vtight)
+    CREST_PRSC = "-prsc"  # create a scoord.* file for each conformer
+    CREST_NICEPRINT = "-niceprint"  # progress bar printout for optimizations
+    CREST_DRY = "-dry"  # performs a "dry run"; only prints the settings
+
+    # iMTD-GC workflows (selected)
+    # ---------
+    CREST_CROSS = "-cross"  # do the GC part [default]
+    CREST_NOCROSS = "-nocross"  # don't do the GC part
+    # 1 int parameter; set SHAKE mode for MD (0=off, 1=H-only, 2=all bonds), default: 2
+    CREST_SHAKE = "-shake"
+    CREST_TSTEP = "-tstep"  # 1 int parameter; set MD time step in fs, default: 5
+
+    # other (selected)
+    # ---------
+    CREST_T = (
+        # 1 int parameter; set total compound_number of CPUs (threads) to be used
+        "-T"
+    )
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class AutoDockVinaEnum:
+    # executable "vina" + parameters
+    # ---------
+    VINA = "vina"
+    VINA_CALL = "vina"  # the binary call
+    VINA_HELP = "--help"  # display usage summary
+    VINA_HELP_ADVANCED = "--help_advanced"  # display usage summary (with all options)
+    VINA_VERSION = "--version"  # diplay program version
+    VINA_VERSION_IDENTIFICATION_STRING = (
+        "AutoDock Vina 1.1.2"  # string, which needs to be present in help output in
+    )
+    # order to assume "AutoDock Vina" can be properly used
+    VINA_CONFIGURATION = (
+        "--config"  # path to configuration file, where options below can be put
+    )
+
+    # input
+    VINA_RECEPTOR = "--receptor"  # rigid part of the receptor (PDBQT)
+    VINA_LIGAND = "--ligand"  # ligand (PDBQT); only one at a time
+    VINA_FLEX = "--flex"  # flexible side chains, if any (PDBQT)
+
+    # search space
+    VINA_CENTER_X = "--center_x"  # X coordinate of the center
+    VINA_CENTER_Y = "--center_y"  # Y coordinate of the center
+    VINA_CENTER_Z = "--center_z"  # Z coordinate of the center
+    VINA_SIZE_X = "--size_x"  # size in the X dimension (Angstroms)
+    VINA_SIZE_Y = "--size_y"  # size in the X dimension (Angstroms)
+    VINA_SIZE_Z = "--size_z"  # size in the X dimension (Angstroms)
+
+    # output
+    VINA_OUT = "--out"  # output models (PDBQT), the default is chosen based on the
+    # ligand file name
+
+    # advanced options
+    VINA_SCORE_ONLY = "--score_only"  # score only - search space can be omitted
+    VINA_LOCAL_ONLY = "--local_only"  # do local search only
+    VINA_RANDOMIZE_ONLY = (
+        "--randomize_only"  # randomize input, attempting to avoid clashes
+    )
+    VINA_WEIGHT_GAUSS1 = "--weight_gauss1"  # gauss_1 weight (default: -0.035579)
+    VINA_WEIGHT_GAUSS2 = "--weight_gauss2"  # gauss_2 weight (default: -0.005156)
+    VINA_WEIGHT_REPULSION = (
+        "--weight_repulsion"  # repulsion weight (default: 0.84024500000000002)
+    )
+    VINA_WEIGHT_HYDROPHOBIC = (
+        "--weight_hydrophobic"  # hydrophobic weight (-0.035069000000000003)
+    )
+    VINA_WEIGHT_HYDROGEN = (
+        "--weight_hydrogen"  # hydrogen bond weight (-0.58743900000000004)
+    )
+    VINA_WEIGHT_ROT = "--weight_rot"  # N_rot weight (default: 0.058459999999999998)
+
+    # miscellaneous (optional)
+    VINA_CPU = "--cpu"  # the number of CPUs to use (the default is to try to detect
+    # the number of CPUs or, failing that, use 1)
+    VINA_SEED = "--seed"  # explicit random seed
+    VINA_EXHAUSTIVENESS = (
+        "--exhaustiveness"  # exhaustiveness of the global search (roughly proportional
+    )
+    # to time): 1+ (default: 8)
+    VINA_NUM_MODES = (
+        "--num_modes"  # maximum number of binding modes to generate (default: 9)
+    )
+    VINA_ENERGY_RANGE = "--energy_range"  # maximum energy difference between the best binding mode and the
+    # worst one displayed [kcal/mol] (default: 3)
+
+    # ---------
+    # Vina output specifications
+    # ---------
+    ADV_PDBQT = ".pdbqt"
+
+    # the score is part of a tag in the PDBQT -> SDF translated output (tag "REMARK"), which looks like that:
+    # < REMARK >
+    # VINA RESULT: -9.1 0.000 0.000
+    # Name = /tmp/tmpjssiy8z4.pdb
+    # ...
+
+    # Note, that the three values are: affinity [kcal/mol] | dist from best mode (rmsd l.b.) | rmsd (u. b.)
+    REMARK_TAG = "REMARK"
+    RESULT_LINE_IDENTIFIER = "VINA RESULT"
+    RESULT_LINE_POS_SCORE = 2
+    RESULT_LINE_POS_RMSDTOBEST_LB = 3
+    RESULT_LINE_POS_RMSDTOBEST_UB = 4
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class CrestOutputEnum:
+
+    COORD = "coord"
+    COORD_ORIGINAL = "coord.original"
+    CRE_MEMBERS = "cre_members"
+    CREST_ENERGIES = "crest.energies"
+    CREST_BEST_XYZ = "crest_best.xyz"
+    CREST_CONFORMERS_SDF = "crest_conformers.sdf"
+    CREST_CONFORMERS_XYZ = "crest_conformers.xyz"
+    CREST_ROTAMERS_XYZ = "crest_rotamers.xyz"
+    XTBTOPO_MOL2 = "xtbtopo.mol"
+
+    # format properties
+    PREFIX_ENERGIES_XYZ = "        "
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class OpenBabelEnum:
+
+    # executable "obabel" + parameters
+    # ---------
+    OBABEL = "obabel"
+    OBABEL_IDENTIFICATION_STRING = "-O<outfilename>"
+    OBABEL_INPUTFORMAT_PDBQT = (
+        # sets the input format to "PDBQT" (output of "AutoDock Vina")
+        "-ipdbqt"
+    )
+    OBABEL_INPUTFORMAT_XYZ = (
+        "-ixyz"  # sets the input format to "XYZ" (format in XTB/TM)
+    )
+    OBABEL_INPUTFORMAT_PDB = "-ipdb"
+    OBABEL_INPUTFORMAT_SDF = "-isdf"  # sets the input format to "SDF"
+    OBABEL_P = "-p"  # sets the <pH> value (e.g. "-p 7.4") for protonation
+    # note, that this overwrites "--addpolarh", which is thus not used
+    # specifies the output path (directly pasted afterwards, e.g. "-Omypath.pdb")
+    OBABEL_O = "-O"
+    OBABEL_OUTPUT_FORMAT_PDBQT = (
+        # sets the output format to "PDBQT" (input for "AutoDock Vina")
+        "-opdbqt"
+    )
+    OBABEL_OUTPUT_FORMAT_SDF = "-osdf"  # sets the output format to "SDF"
+    OBABEL_OUTPUTFORMAT_XYZ = (
+        "-oxyz"  # sets the output format to "XYZ" (format in XTB/TM)
+    )
+    OBABEL_X = "-x"  # specifies generation options
+    OBABEL_M = "-m"  # produce multiple output files
+    # one of the 'X' options ("-x"), which disables the tree construction of the receptor
+    # (makes it static), directly pasted together: e.g. "-xr"
+    OBABEL_X_R = "r"
+    # sets the partial charge generation method (execute "obabel -L charges" to see list of available methods)
+    OBABEL_PARTIALCHARGE = "--partialcharge"
+    # one method to compute the partial charges, used as: "--partialcharge gasteiger"
+    OBABEL_PARTIALCHARGE_GASTEIGER = "gasteiger"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class OpenBabelOutputEnum:
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class OMEGAEnum:
+
+    # executable "oeomega" + parameters; the first parameter is a string indicating the mode
+    # ---------
+    OMEGA = "oeomega"
+    OMEGA_HELP = "--help"  # print the help message
+    OMEGA_HELP_IDENTIFICATION_STRING = "To cite OMEGA please"
+    OMEGA_MODE_CLASSIC = "classic"  # The original customizable omega2 interface
+    OMEGA_MODE_MACROCYCLE = "macrocycle"  # Conformer generation for macrocycles
+    OMEGA_MODE_ROCS = "rocs"  # Optimal conformer generation for ROCS
+    OMEGA_MODE_POSE = "pose"  # Optimal conformer generation for molecular
+    # alignment and pose prediction by docking
+    OMEGA_MODE_DENSE = "dense"  # Optimal conformer generation for FREEDOM
+
+    # mode "classic" parameters
+    # ---------
+    CLASSIC_INPUT = "-in"  # Input filename (required, if "-param" not set)
+    CLASSIC_OUTPUT = "-out"  # Output filename (required, if "-param" not set)
+    CLASSIC_PARAM = "-param"  # A parameter file
+    CLASSIC_PREFIX = "-prefix"  # Prefix to use to name output files
+    CLASSIC_PROGRESS = "-progress"  # Method of showing job progress. Either "none",
+    # "dots", "log" or "percent".
+    CLASSIC_SDENERGY = "-sdEnergy"  # Writes conformer energies to the SD tag field
+    CLASSIC_VERBOSE = "-verbose"  # Triggers copious logging output
+    # Generate structures from connection-table only.
+    CLASSIC_FROMCT = "-fromCT"
+    CLASSIC_EWINDOW = "-ewindow"  # Energy window used for conformer selection.
+    CLASSIC_MAXCONFS = (
+        "-maxconfs"  # Maximum compound_number of conformations to be saved
+    )
+    CLASSIC_RMS = "-rms"  # RMS threshold used to determine duplicate
+    # conformations
+    # if set to false ("-canonOrder false"), OMEGA will not update the atom orders
+    CLASSIC_CANON_ORDER = "-canonOrder"
+    CLASSIC_STRICTSTEREO = (
+        "-strictstereo"  # Requires that all chiral atoms and bonds have
+    )
+    # specified stereo
+    CLASSIC_STRICT = "-strict"  # A convenience flag to set "-strictstereo",
+    # "-strictatomtyping" and "-strictfrags" to true
+    # or false and override [sic] previous settings.
+
+    # mode "rocs" parameters
+    # ---------
+    ROCS_INPUT = "-in"  # Input filename (required, if "-param" not set)
+    ROCS_OUTPUT = "-out"  # Output filename (required, if "-param" not set)
+    ROCS_PARAM = "-param"  # A parameter file
+    ROCS_PREFIX = "-prefix"  # Prefix to use to name output files
+    ROCS_PROGRESS = "-progress"  # Method of showing job progress. Either "none",
+    # "dots", "log" or "percent".
+    ROCS_VERBOSE = "-verbose"  # Triggers copious logging output
+
+    # mode "dense" parameters
+    # ---------
+    DENSE_INPUT = "-in"  # Input filename (required, if "-param" not set)
+    DENSE_OUTPUT = "-out"  # Output filename (required, if "-param" not set)
+    DENSE_PARAM = "-param"  # A parameter file
+    DENSE_PREFIX = "-prefix"  # Prefix to use to name output files
+    DENSE_PROGRESS = "-progress"  # Method of showing job progress. Either "none",
+    # "dots", "log" or "percent".
+    DENSE_VERBOSE = "-verbose"  # Triggers copious logging output
+
+    # mode "macrocycle" parameters
+    # ---------
+    MACROCYCLE_INPUT = "-in"  # Input filename (required, if "-param" not set)
+    # Output filename (required, if "-param" not set)
+    MACROCYCLE_OUTPUT = "-out"
+    MACROCYCLE_PARAM = "-param"  # A parameter file
+    MACROCYCLE_PREFIX = "-prefix"  # Prefix to use to name output files
+    MACROCYCLE_EWINDOW = "-ewindow"  # Energy window for the output conformers
+    MACROCYCLE_ITERATION_CYCLE_SIZE = (
+        "-iteration_cycle_size"  # Number of iterations to run before checking if a
+    )
+    # new minimum was found (run will finish if no new
+    # minimum is found).
+    MACROCYCLE_MAXCONFS = (
+        "-maxconfs"  # Maximum compound_number of conformations to be saved
+    )
+    MACROCYCLE_MAX_ITERATIONS = (
+        # Maximum compound_number of iterations (calculation may
+        "-max_iterations"
+    )
+    # converge before reaching this compound_number).
+    MACROCYCLE_REF_TOLERANCE = (
+        "-ref_tolerance"  # RMS gradient tolerance for force field refinement
+    )
+    MACROCYCLE_RMS = "-rms"  # RMS clustering threshold (if 0.0 clustering is
+    # skipped).
+    MACROCYCLE_RMSD_DEDUPLICATE = (
+        "-rmsd_deduplicate"  # Deduplicate using a RMSD calculation (slow)
+    )
+    # rather than energy and torsion comparison
+
+    # mode "pose" parameters
+    # ---------
+    POSE_INPUT = "-in"  # Input filename (required, if "-param" not set)
+    POSE_OUTPUT = "-out"  # Output filename (required, if "-param" not set)
+    POSE_PREFIX = "-prefix"  # Prefix to use to name output files
+    POSE_PROGRESS = "-progress"  # Method of showing job progress. Either "none",
+    # "dots", "log" or "percent".
+    POSE_VERBOSE = "-verbose"  # Triggers copious logging output
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class OMEGAOutputEnum:
+
+    # tags
+    CLASSIC_ENERGY_OUTPUT_TAG = "mmff94smod_NoEstat"
+
+    # other
+    # This hard-coded output name will be parsed.
+    OUTPUT_SDF_NAME = "omega_out.sdf"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class XTBEnum:
+
+    # Usage: xtb[options] <geometry> [options]
+    # < geometry > may be provided as valid TM coordinate file(*coord in Bohr) or in xmol format(*xyz in Ångström).
+    # Output Conventions: total energies are given in atomic units (Eh), gaps/HL energies are given in eV
+    XTB = "xtb"
+    XTB_HELP = "--help"
+    XTB_HELP_IDENTIFICATION_STRING = "normal termination of xtb"  # written to stderr
+    XTB_CHRG = "--chrg"  # 1 int parameter; specify molecular charge
+    XTB_UHF = "--uhf"  # 1 int parameter; specify Nalph-Nbeta
+    # 1 float parameter; accuracy for SCC calculation, lower is better (default = 1.0)
+    XTB_ACC = "--acc"
+    # 1 int parameter; compound_number of iterations in SCC (default = 250)
+    XTB_ITERATION = "--iteration"
+    # 1 level parameter; compound_number of cycles in ANCopt (default = automatic)
+    XTB_CYCLES = "--cycles"
+    XTB_GFN = (
+        # 1 int parameter; specify parametrisation of GFN-xTB (default = 2)
+        "--gfn"
+    )
+    XTB_QMDFF = "--qmdff"  # use QMDFF for single point (needs solvent-file)
+    XTB_TM = "--tm"  # use TURBOMOLE for single point (needs control-file)
+    XTB_ORCA = "--orca"  # use ORCA for single point (writes ORCA input)
+    XTB_MOPAC = "--mopac"  # use MOPAC for single point (writes MOPAC input)
+    # uses periodic boundary conditions (in developement)
+    XTB_PERIODIC = "--periodic"
+    # 1 float parameter; electronic temperature (default = 300K)
+    XTB_ETEMP = "--etemp"
+    # 1 level parameter; generalized born (GB) model with solvent accessable surface area (SASA) model
+    XTB_GBSA = "--gbsa"
+    XTB_OPT = "--opt"  # 1 level parameter; either "crude", "sloppy",
+    # "loose", "normal" (default), "tight", "verytight"
+    XTB_P = "-P"  # 1 int parameter; compound_number of cores
+
+    # --vparam FILE  Parameter file for vTB calculation
+    # --xparam FILE  Parameter file for xTB calculation (not used)
+    # --pop          requests printout of Mulliken population analysis
+    # --molden       requests printout of molden file
+    # --dipole      requests dipole printout
+    # --wbo          requests Wiberg bond order printout
+    # --lmo          requests localization of orbitals
+    # --fod          requests FOD calculation, adjusts electronic temperature to 12500 K if possible
+    # --scc, --sp    performs a single point calculation
+    # --vip          performs calculation of ionisation potential
+    # --vea          performs calculation of electron affinity
+    # --vipea        performs calculation of IP and EA
+    # --vomega       performs calculation of electrophilicity index
+    # --vfukui       calculate Fukui indicies using GFN-xTB
+    # --esp          calculate electrostatic potential on VdW-grid
+    # --stm          calculate STM image
+    # --grad         performs a gradient calculation
+    # --optts [LEVEL] [ROOT] call ancopt(3) to perform a transition state optimization, may
+    # need to perform a hessian calculation first
+    # --hess         perform a numerical hessian calculation on input geometry
+    # --ohess [LEVEL] perform a numerical hessian calculation on an ancopt(3) optimized geometry
+    # --md           molecular dynamics simulation on start geometry
+    # --omd          molecular dynamics simulation on ancopt(3) optimized geometry, a loose
+    # optimization level will be chosen.
+    # --metadyn [INT] meta dynamics simulation on start geometry saving INT snapshots to bias the simulation
+    # --siman        conformational search by simulated annealing based on molecular dynamics.
+    # Conformers are optimized with ancopt.
+    # --modef INT    modefollowing algorithm.  INT specifies the mode that should be used for the modefollowing.
+    # -I,--input FILE   use FILE as input source for xcontrol(7) instructions
+    # --namespace STRING give this xtb(1) run a namespace. All files, even temporary ones, will
+    # be named accordingly (might not work everywhere).
+    # --[no]copy     copies the xcontrol file at startup (default = true)
+    # --[no]restart  restarts calculation from xtbrestart (default = true)
+    # -P,--parallel INT compound_number of parallel processes
+    # --define       performs automatic check of input and terminate
+    # --version      print version and terminate
+    # --citation     print citation and terminate
+    # --license      print license and terminate
+    # -v,--verbose      be more verbose (not supported in every unit)
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class XTBOutputEnum:
+
+    XTBOPT_SDF = "xtbopt.sdf"
+    XTBTOPO_SDF = "xtbtopo.sdf"
+    XTBOPT_LOG = "xtbopt.log"
+    XTBRESTART = "xtbrestart"
+    WBO = "wbo"
+    CHARGES = "charges"
+    SUCCESS = "success"
+    FAILURE = "failure"
+
+    # tags
+    TOTAL_ENERGY_TAG = "total energy / Eh"
+    GRADIENT_TAG = "gradient norm / Eh/a0"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class MacromodelEnum:
+
+    MACROMODEL = "macromodel"
+    MACROMODEL_HELP = "-h"
+    MACROMODEL_HELP_IDENTIFICATION_STRING = "MacroModel Startup Script"
+    MACROMODEL_NJOBS = "-NJOBS"
+    MACROMODEL_WAIT = "-WAIT"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class ModelBuilderEnum:
+
+    # OPTBUILD parameters
+    OPTBUILD_ENTRY_POINT = "optbuild.py"
+    CONFIG = "--config"
+    BEST_BUILDCONFIG_OUTPATH = (
+        "--best-buildconfig-outpath"  # path to the output JSON for the best trial
+    )
+    BEST_MODEL_OUTPATH = (
+        # path to the output model (PKL) for the best trial
+        "--best-model-outpath"
+    )
+    MERGED_MODEL_OUTPATH = (
+        "--merged-model-outpath"  # path to the production output model (PKL)
+    )
+    PERSISTENCE_MODE = "--model-persistence-mode"
+    PERSISTENCE_MODE_PLAINSKLEARN = "plain_sklearn"
+    PERSISTENCE_MODE_SKLEARNWITHOPTUNAAZ = "sklearn_with_optunaz"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class LigprepEnum:
+
+    LIGPREP = "ligprep"
+    LIGPREP_HELP = "-h"
+    LIGPREP_HELP_IDENTIFICATION_STRING = "usage: ligprep [options]"
+    # SMI input followed by <path> (alternatives: "-icsv", "-imae" and "-isd")
+    LIGPREP_INPUT_ISMI = "-ismi"
+    LIGPREP_OUTPUT_OSD = (
+        "-osd"  # SD(F) output followed by <path> (alternative: "-omae")
+    )
+    # not used in AZdock, but would be an option to feed parameters from configuration file
+    LIGPREP_INP_CONFIG = "-inp"
+    LIGPREP_EPIK = (
+        "-epik"  # Use "Epik" for ionization and tautomerization (Recommended)
+    )
+    LIGPREP_PH = (
+        # Effective / target pH; followed by <number> (use 7.0 as default)
+        "-ph"
+    )
+    # pH tolerance for generated structures; followed by <number> (use 2.0 as default)
+    LIGPREP_PHT = "-pht"
+    LIGPREP_AC = (
+        "-ac"  # Do not respect existing chirality properties and do not respect
+    )
+    # chiralities from the input geometry. Generate stereoisomers for all chiral centers up to
+    # the number permitted (specified using the -s option). This is equivalent to "Generate
+    # all combinations" in the Ligand Preparation user interface. Default
+    # behavior is to respect only explicitly indicated chiralities.
+    # Filter structures via LigFilter using specifications from the file provided. Default: do not filter.
+    LIGPREP_F = "-f"
+    LIGPREP_G = (
+        # Respect chiralities from input geometry when generating stereoisomers.
+        "-g"
+    )
+    # Generate up to this <number> stereoisomers per input structure. (Default: 32).
+    LIGPREP_S = "-s"
+    # Force-field to be used for the final geometry optimization (either 14 or 16, which refers to OPLS_2005 and
+    LIGPREP_BFF = "-bff"
+    # OPLS3e respectively. Default: 14
+    LIGPREP_FF_OPLS_2005 = "14"  # Default force-field
+    LIGPREP_FF_OPLS3e = "16"  # Alternative force-field
+    LIGPREP_NJOBS = (
+        # Divide the overall job into NJOBS subjobs. Set to 1 by default.
+        "-NJOBS"
+    )
+    # Divide the overall job into subjobs with no more than NSTRUCTS structures. Set to 1 by default.
+    LIGPREP_NSTRUCTS = "-NSTRUCTS"
+    LIGPREP_HOST = (
+        # Run the job on <hostname> remotely on the indicated host entry.
+        "-HOST"
+    )
+    LIGPREP_HOST_LOCALHOST = "localhost"  # Default value for the run.
+    LIGPREP_WAIT = "-WAIT"  # Do not return a prompt until the job completes.
+
+    LIGPREP_LOG_ENDING = ".log"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class GlideEnum:
+    # executable "glide" + parameters
+    # note, that you can get the full list of parameters with "$SCHRODINGER/glide -k"
+    # ---------
+    GLIDE = "glide"
+    GLIDE_CALL = "$SCHRODINGER/glide"
+    GLIDE_HELP = "-h"
+    GLIDE_HELP_IDENTIFICATION_STRING = "positional arguments:"
+    GLIDE_WAIT = "-WAIT"
+    GLIDE_OVERWRITE = "-OVERWRITE"  # Remove previous job files before running.
+    GLIDE_NJOBS = "-NJOBS"  # Divide the overall job into NJOBS subjobs.
+    GLIDE_HOST = "-HOST"  # Run job remotely on the indicated host entry.
+    # WARNING: does not seem to be supported (any longer?) - probably "-NOLOCAL" now?
+    GLIDE_TMPLAUNCHDIR = "-TMPLAUNCHDIR"
+    # WARNING: does not seem to be supported (any longer?)
+    GLIDE_ATTACHED = "-ATTACHED"
+    # amide bond rotation behavior: "fixed", "free", "penal", "trans", "gen[eralized]"
+    GLIDE_AMIDE_MODE = "AMIDE_MODE"
+    # bypass elimination of poses in rough scoring stage (useful for fragment docking)
+    GLIDE_EXPANDED_SAMPLING = "EXPANDED_SAMPLING"
+    GLIDE_GRIDFILE = "GRIDFILE"  # path to grid (.grd or .zip) file
+    GLIDE_LIGANDFILE = "LIGANDFILE"  # Glide docking ligands file name
+    # expand size of the Glide funnel by N times to process poses from N confgen runs with minor
+    # perturbations to the input ligand coordinates
+    GLIDE_NENHANCED_SAMPLING = "NENHANCED_SAMPLING"
+    # format for file containing docked poses: "poseviewer" for _pv.mae output; "ligandlib" for
+    # _lib.mae; similarly "poseviewer_sd" and "ligandlib_sd" for sdf output; "phase_subset" for bypassing
+    # _lib or _pv in favor of a Phase subset file.
+    GLIDE_POSE_OUTTYPE = "POSE_OUTTYPE"
+    GLIDE_POSE_OUTTYPE_LIGANDLIB = (
+        "ligandlib_sd"  # sets the write-out to SDF (easily parsed)
+    )
+    # uses the poseviewer (MAE format) write-out; contains the receptor
+    GLIDE_POSE_OUTTYPE_POSEVIEWER = "poseviewer"
+    GLIDE_POSES_PER_LIG = (
+        "POSES_PER_LIG"  # maximum number of poses to report per each input ligand
+    )
+    # maximum number of best-by-Emodel poses to submit to post-docking minimization
+    GLIDE_POSTDOCK_NPOSE = "POSTDOCK_NPOSE"
+    GLIDE_POSTDOCKSTRAIN = (
+        "POSTDOCKSTRAIN"  # include strain correction in post-docking score
+    )
+    # glide docking precision ("SP", "XP" or "HTVS")
+    GLIDE_PRECISION = "PRECISION"
+    # reward formation of intramolecular hydrogen bonds in the ligand
+    GLIDE_REWARD_INTRA_HBONDS = "REWARD_INTRA_HBONDS"
+    GLIDE_USE_CONS = "USE_CONS"
+    GLIDE_NREQUIRED_CONS = "NREQUIRED_CONS"
+    # if any of these string is present in the logfile associated with a subjob, all went well
+    GLIDE_LOG_SUCCESS_STRING = "glide_sort command succeeded"
+    GLIDE_LOG_FINISHED_STRINGS = {"Exiting Glide"}
+    GLIDE_LOG_FAIL_STRINGS = {
+        "*** Error in",
+        # if any of these strings is present in the logfile associated with a subjob, there was an
+        # issue resulting in the complete failure of the execution
+        "Glide cannot recover from this signal and will now abort.",
+        "======= Backtrace: =========",
+    }
+
+    # "Glide: FATAL mmlewis error"}
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class SchrodingerExecutablesEnum:
+
+    # executable "licadmin" + parameters
+    # ---------
+    LICADMIN = "licadmin"
+    LICADMIN_STAT = "STAT"  # returns the list of tokens used / available
+
+    # executable "sdconvert" + parameters
+    # ---------
+    SCHRODINGER_MODULE = "module load schrodinger/2021-2-js-aws"
+    SDCONVERT = "sdconvert"
+    SDCONVERT_CALL = "$SCHRODINGER/utilities/sdconvert"
+    SDCONVERT_HELP = ""
+    SDCONVERT_HELP_IDENTIFICATION_STRING = "mae : Maestro format"
+    SDCONVERT_A = "-a"  # append structures to the output file
+    # input; note that the format is directly appended (e.g. "-isd")
+    SDCONVERT_I = "-i"
+    SDCONVERT_O = (
+        # output; note that the format is directly appended (e.g. "-omae")
+        "-o"
+    )
+    SDCONVERT_FORMAT_SD = "sd"  # MDL SDfile format
+    SDCONVERT_FORMAT_PDB = "pdb"  # PDB file format
+    SDCONVERT_FORMAT_MM = "mm"  # MacroModel (.dat) format
+    SDCONVERT_FORMAT_MAE = "mae"  # Maestro format
+    SDCONVERT_TITLE = (
+        "-title"  # define SD property <prop> as the source of the Maestro title
+    )
+    SDCONVERT_NOSTEREO = (
+        "-nostereo"  # do not record the atom parity info from the input file
+    )
+    # do not convert aromatic type 4 bonds to single and double bonds (which is the Maestro convention)
+    SDCONVERT_NOAROM = "-noarom"
+
+    # executable "structcat" + parameters
+    STRUCTCAT = "structcat"
+    STRUCT_SPLIT_CALL = "$SCHRODINGER/run split_structure.py"
+    STRUCT_SPLIT = "structsplit"
+    STRUCTCONVERT = "structconvert"
+    STRUCTCAT_CALL = "$SCHRODINGER/utilities/structcat"
+    STRUCTCONVERT_CALL = "$SCHRODINGER/utilities/structconvert"
+    FMP_STATS = "fmp_stats"
+    FMP_STATS_CALL = "$SCHRODINGER/run -FROM scisol fmp_stats.py"
+    STRUCTCAT_HELP = "-h"
+    STRUCTCAT_HELP_IDENTIFICATION_STRING = "<format> must be one of"
+    # input; note that the format is directly appended (e.g. "-isd")
+    STRUCTCAT_I = "-i"
+    STRUCTCAT_O = (
+        # output; note that the format is directly appended (e.g. "-omae")
+        "-o"
+    )
+    STRUCTCAT_FORMAT_MAE = "mae"  # Maestro format
+    STRUCTCAT_FORMAT_SD = "sd"  # MDL SDfile format
+    STRUCTCAT_FORMAT_SDF = "sdf"
+    STRUCTCAT_FORMAT_PDB = "pdb"  # PDB format
+    STRUCTCAT_FORMAT_MOL2 = "mol2"  # sybyl (.mol2) format
+    POSEVIEWER_FILE_KEY = "pv.maegz"
+    PREPWIZARD = "prepwizard"
+    PREPWIZARD_CALL = "$SCHRODINGER/utilities/prepwizard"
+    MULTISIM_EXEC = "$SCHRODINGER/utilities/multisim"
+    AWS_BINARY_LOC = (
+        "ssh <location> /opt/schrodinger/suite/installations/default/"
+    )
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class PrimeEnum:
+
+    PRIME_MMGBSA = "prime_mmgbsa"
+    PRIME_HELP = "-h"
+    PRIME_HELP_IDENTIFICATION_STRING = (
+        "run $SCHRODINGER/prime_mmgbsa -h for a complete listing of all options."
+    )
+    PRIME_NJOBS = "-NJOBS"
+    PRIME_WAIT = "-WAIT"
+
+    # settings
+    PRIME_OUTTYPE = "-out_type"
+    PRIME_OUTTYPE_LIGAND = "LIGAND"
+
+    # tags in output
+    PRIME_MMGBSA_TOTAL_ENERGY = (
+        "r_psp_MMGBSA_dG_Bind"  # total energy of binding: complex - receptor - ligand
+    )
+    PRIME_MMGBSA_TOTAL_ENERGY_NS = (
+        # as above but without strain energy correction
+        "r_psp_MMGBSA_dG_Bind(NS)"
+    )
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class PantherEnum:
+
+    PANTHER_PTYHON2 = "python2"
+    PANTHER_ENTRYPOINT = "panther.py"
+    PANTHER_CONFIG = "panther_config.in"
+    PANTHER_OUTPUT_FILE = "neg_image.mol2"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class ShaepEnum:
+
+    SHAEP_EXECUTABLE = "shaep"
+    OUTPUT_SIMILARITY = "similarity.txt"
+    NEGATIVE_IMAGE_OUTPUT_FILE = "neg_image.mol2"
+    CONFORMER_PATH = "conformer.sdf"
+    TAG_SHAPE_SIMILARITY = "shape_similarity"
+    TAG_ESP_SIMILARITY = "esp_similarity"
+
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class GromacsEnum:
+
+    # gmx programs
+    PDB2GMX = "gmx pdb2gmx"
+    EDITCONF = "gmx editconf"
+    SOLVATE = "gmx solvate"
+    GROMPP = "gmx grompp"
+    GENION = "gmx genion"
+    MDRUN = "gmx mdrun"
+    MAKE_NDX = "gmx make_ndx"
+    GENRESTR = "gmx genrestr"
+    TRJCAT = "gmx trjcat"
+    TRJCONV = "gmx trjconv"
+    CLUSTER = "gmx cluster"
+    RMS = "gmx rms"
+    ANTECHAMBER = "antechamber"
+    PDB2GMX_FAIL_ID_STRING = "Required option was not provided"
+    PYTHON = "python3"
+    ACPYPE_BINARY = "acpype.py"
+    MMPBSA = "gmx_MMPBSA"
+    DO_DSSP = "gmx do_dssp"
+    ACPYPE_PATH = "$ACPYPE"
+    CLUSTER_TS = "Rscript $MDPLOT/MDplot/inst/bash/MDplot_bash.R clusters_ts"
+    PRIMARY_COMPONENTS = ["Protein", "DNA", "RNA"]
+    # from residuetypes.dat
+    AMBER_PARAMETRISED_COMPONENTS = [
+        "ABU",
+        "ACE",
+        "AIB",
+        "ALA",
+        "ARG",
+        "ARGN",
+        "ASN",
+        "ASN1",
+        "ASP",
+        "ASP1",
+        "ASPH",
+        "ASPP",
+        "ASH",
+        "CT3",
+        "CYS",
+        "CYS1",
+        "CYS2",
+        "CYSH",
+        "DALA",
+        "GLN",
+        "GLU",
+        "GLUH",
+        "GLUP",
+        "GLH",
+        "GLY",
+        "HIS",
+        "HIS1",
+        "HISA",
+        "HISB",
+        "HISH",
+        "HISD",
+        "HISE",
+        "HISP",
+        "HSD",
+        "HSE",
+        "HSP",
+        "HYP",
+        "ILE",
+        "LEU",
+        "LSN",
+        "LYS",
+        "LYSH",
+        "MELEU",
+        "MET",
+        "MEVAL",
+        "NAC",
+        "NME",
+        "NHE",
+        "NH2",
+        "PHE",
+        "PHEH",
+        "PHEU",
+        "PHL",
+        "PRO",
+        "SER",
+        "THR",
+        "TRP",
+        "TRPH",
+        "TRPU",
+        "TYR",
+        "TYRH",
+        "TYRU",
+        "VAL",
+        "PGLU",
+        "HID",
+        "HIE",
+        "HIP",
+        "LYP",
+        "LYN",
+        "CYN",
+        "CYM",
+        "CYX",
+        "DAB",
+        "ORN",
+        "HYP",
+        "NALA",
+        "NGLY",
+        "NSER",
+        "NTHR",
+        "NLEU",
+        "NILE",
+        "NVAL",
+        "NASN",
+        "NGLN",
+        "NARG",
+        "NHID",
+        "NHIE",
+        "NHIP",
+        "NHISD",
+        "NHISE",
+        "NHISH",
+        "NTRP",
+        "NPHE",
+        "NTYR",
+        "NGLU",
+        "NASP",
+        "NLYS",
+        "NORN",
+        "NDAB",
+        "NLYSN",
+        "NPRO",
+        "NHYP",
+        "NCYS",
+        "NCYS2",
+        "NMET",
+        "NASPH",
+        "NGLUH",
+        "CALA",
+        "CGLY",
+        "CSER",
+        "CTHR",
+        "CLEU",
+        "CILE",
+        "CVAL",
+        "CASN",
+        "CGLN",
+        "CARG",
+        "CHID",
+        "CHIE",
+        "CHIP",
+        "CHISD",
+        "CHISE",
+        "CHISH",
+        "CTRP",
+        "CPHE",
+        "CTYR",
+        "CGLU",
+        "CASP",
+        "CLYS",
+        "CORN",
+        "CDAB",
+        "CLYSN",
+        "CPRO",
+        "CHYP",
+        "CCYS",
+        "CCYS2",
+        "CMET",
+        "CASPH",
+        "CGLUH",
+        "DA",
+        "DG",
+        "DC",
+        "DT",
+        "DA5",
+        "DG5",
+        "DC5",
+        "DT5",
+        "DA3",
+        "DG3",
+        "DC3",
+        "DT3",
+        "DAN",
+        "DGN",
+        "DCN",
+        "DTN",
+        "A",
+        "U",
+        "C",
+        "G",
+        "RA",
+        "RU",
+        "RC",
+        "RG",
+        "RA5",
+        "RT5",
+        "RU5",
+        "RC5",
+        "RG5",
+        "RA3",
+        "RT3",
+        "RU3",
+        "RC3",
+        "RG3",
+        "RAN",
+        "RTN",
+        "RUN",
+        "RCN",
+        "RGN",
+        "SOL",
+        "WAT",
+        "HOH",
+        "OHH",
+        "TIP",
+        "T3P",
+        "T4P",
+        "T5P",
+        "T3H",
+        "K",
+        "NA",
+        "CA",
+        "MG",
+        "CL",
+        "ZN",
+        "CU1",
+        "CU",
+        "LI",
+        "NA+",
+        "RB",
+        "CS",
+        "F",
+        "CL-",
+        "BR",
+        "I",
+        "OH",
+        "Cal",
+        "IB+",
+    ]
+    IONS = ["ZN", "MG", "CU", "CA", "NA", "CL", "RB", "CS", "F", "BR", "I", "OH", "K"]
+    LIG_ID = "lig_id.lig"
+    LIG_EXT = "lig"
+    ATOMS = ["HETATM", "ATOM"]
+    ATOMTYPES = "[ atomtypes ]"
+    MOLECULETYPES = "[ moleculetype ]"
+    MOLECULES = "[ molecules ]\n"
+    SOLVENTS = ["HOH ", "SOL", "WAT"]
+    TERMINATIONS = ["ENDMDL", "END"]
+
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class PMXEnum:
+
+    # $PMX programs (see respective steps for the help strings)
+    ABFE = "$PMX abfe"
+    ANALYSE = "$PMX analyse"
+    ATOMMAPPING = "$PMX atomMapping"
+    DOUBLEBOX = "$PMX doublebox"
+    GENLIB = "$PMX genlib"
+    GENTOP = "$PMX gentop"
+    LIGANDHYBRID = "$PMX ligandHybrid"
+    MUTATE = "$PMX mutate"
+
+    # custom scripts
+    BOX_WATER_IONS = "box_water_ions.py"
+    PREPARE_SIMULATIONS = "prepare_simulations.py"
+    PREPARE_TRANSITIONS = "prepare_transitions.py"
+    RUN_ANALYSIS = "run_analysis.py"
+    RUN_SIMULATIONS = "run_simulations.py"
+    ASSEMBLE_SYSTEMS = "assemble_systems.py"
+
+    ANALYSE_HELP = "-h"
+    ANALYSE_HELP_SUCCESS_STRING = "Calculates free energies from fast"
+
+    # standard file extensions
+    PDB = "pdb"
+
+    # file system standards
+    HYBRID_STR_TOP = "hybridStrTop"
+
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class PMXAtomMappingEnum:
+
+    HELP = "--help"  # show this help message and exit. <>
+    I1 = "-i1"  # Input ligand structure 1. Default is "lig1.pdb" <1 pdb file>
+    I2 = "-i2"  # Input ligand structure 2. Default is "lig2.pdb" <1 pdb file>
+    O1 = "-o1"  # Output pairs: column1:mol1, column2:mol2. Default is "pairs1.dat" <1 dat file>
+    O2 = "-o2"  # Output pairs: column1:mol2, column2:mol1. Default is "pairs2.dat" <1 dat file>
+    OPDB1 = "-opdb1"  # Optional output: superimposed structure 1. <1 pdb file>
+    OPDB2 = "-opdb2"  # Optional output: superimposed structure 2. <1 pdb file>
+    OPDBM1 = "-opdbm1"  # Optional output: morphable atoms in structure 1 <2 pdb files>
+    OPDBM2 = "-opdbm2"  # Optional output: morphable atoms in structure 2. <2 pdb files>
+    # Optional output: score of the morph. Default is "out_score.dat" <1 dat file>
+    SCORE = "-score"
+    N1 = "-n1"  # Optional input: index of atoms to consider for mol1 <1 ndx file>
+    N2 = "-n2"  # Optional input: index of atoms to consider for mol2 <1 ndx file>
+    LOG = "-log"  # Output: log file. Default is "mapping.log" <1 log file>
+    NO_ALIGNMENT = (
+        # Should the alignment method be disabled (default enabled) <>
+        "--no-alignment"
+    )
+    # Should the MCS method be disabled (default enabled) <>
+    NO_MCS = "--no-mcs"
+    # Should non-polar hydrogens be discarded from morphing into any other hydrogen (default True) <>
+    NO_H2H = "--no-H2H"
+    # Should polar hydrogens be morphed into polar hydrogens (default False) <>
+    H2HPOLAR = "--H2Hpolar"
+    H2HEAVY = (
+        # Should hydrogen be morphed into a heavy atom (default False) <>
+        "--H2Heavy"
+    )
+    # Should rings only be used in the MCS search and alignemnt (default False) <>
+    RINGSONLY = "--RingsOnly"
+    # Should the distance criterium be also applied in the MCS based search (default False) <>
+    DMCS = "--dMCS"
+    # Try swapping the molecule order which would be a cross-check and require double execution time (default False) <>
+    SWAP = "--swap"
+    NO_CHIRALITY = (
+        # Perform chirality check for MCS mapping (default True) <>
+        "--no-chirality"
+    )
+    # Distance (nm) between atoms to consider them morphable for alignment approach (default 0.05 nm). <1 numeric value>
+    D = "--d"
+    # Maximum time (s) for an MCS search (default 10 s). <1 numeric value>
+    TIMEOUT = "--timeout"
+
+    LIGAND_DIR = "input/ligands"
+
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class PMXLigandHybridEnum:
+
+    HELP = "--help"  # Show this help message and exit. <>
+    I1 = "-i1"  # Input ligand structure 1. Default is "lig1.pdb" <1 pdb file>
+    I2 = "-i2"  # Input ligand structure 2. Default is "lig2.pdb" <1 pdb file>
+    ITP1 = "-itp1"  # Input ligand topology 1. Default is "lig1.itp" <1 itp file>
+    ITP2 = "-itp2"  # Input ligand topology 2. Default is "lig2.itp" <1 itp file>
+    PAIRS = "-pairs"  # Optional input: atom pair mapping. <1 dat file>
+    N1 = "-n1"  # Optional input: index of atoms to consider for mol1. <1 ndx file>
+    N2 = "-n2"  # Optional input: index of atoms to consider for mol2. <1 ndx file>
+    OA = "-oA"  # Output: hybrid structure based on the ligand 1. Default is "mergedA.pdb" <1 pdb file>
+    OB = "-oB"  # Output: hybrid structure based on the ligand 2. Default is "mergedB.pdb" <1 pdb file>
+    OITP = "-oitp"  # Output: hybrid topology. Default is "merged.itp". <1 itp file>
+    # Output: atomtypes for hybrid topology. Default is "ffmerged.itp" <1 itp file>
+    OFFITP = "-offitp"
+    LOG = "-log"  # Output: log file. Default is "hybrid.log" <1 log file>
+    # Optional: if -pairs not provided, distance (nm) between atoms to consider them morphable
+    # for alignment approach (default 0.05 nm). <1 numerical value>
+    D = "--d"
+    FIT = "--fit"  # Fit mol2 onto mol1, only works if pairs.dat is provided. <>
+    SPLIT = "--split"  # Split the topology into separate transitions.
+    SCDUMM = (
+        "--scDUMm"  # Scale dummy masses using the counterpart atoms. <1 numeric value>
+    )
+    SCDUMA = "--scDUMa"  # Scale bonded dummy angle parameters. <1 numeric value>
+    SCDUMD = "--scDUMd"  # Scale bonded dummy dihedral parameters. <1 numeric value>
+    DEANG = "--deAng"  # Decouple angles composed of 1 dummy and 2 non-dummies.
+
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class FepPlusEnum:
+    FEP_MAPPER = "$SCHRODINGER/run -FROM scisol fep_mapper.py"
+    FEP_EXECUTOR = "$SCHRODINGER/fep_plus"
+    FEP_HELP = "-h"
+    JSC_LIST = 'ssh <location> "export SCHRODINGER=/opt/schrodinger/suite/installations/default && /opt/schrodinger/suite/installations/default/jsc list"'
+    JSC_TAIL_FILE = 'ssh <location> "export SCHRODINGER=/opt/schrodinger/suite/installations/default && /opt/schrodinger/suite/installations/default/jsc tail-file'
+    DICT = "dict"
+    PATH = "path"
+    FEP_MAPPER_HELP_SUCCESS_STRING = "If given, the match will be allowed"
+
+
+class PdbFixerEnum:
+    FIXER = "pdbfixer"
+
+
+class DSSPEnum:
+    MKDSSP = "mkdssp"
+
+
+class SlurmEnum:
+    SBATCH = "sbatch"
+    STATE = "State"
+    COMPLETED = "COMPLETED"
+    RUNNING = "RUNNING"
+    PENDING = "PENDING"
+    FAILED = "FAILED"
diff --git a/icolos/utils/enums/step_enums.py b/icolos/utils/enums/step_enums.py
new file mode 100644
index 0000000..61ecc44
--- /dev/null
+++ b/icolos/utils/enums/step_enums.py
@@ -0,0 +1,922 @@
+from enum import Enum
+
+
+class StepBaseEnum(str, Enum):
+    # general settings
+    STEPID = "step_id"
+
+    # different step types
+    STEP_TYPE = "type"
+    STEP_CREST = "CREST"
+    STEP_OMEGA = "OMEGA"
+    STEP_XTB = "XTB"
+    STEP_MACROMODEL = "MACROMODEL"
+    STEP_TURBOMOLE = "TURBOMOLE"
+    STEP_COSMO = "COSMO"
+    STEP_INITIALIZATION = "INITIALIZATION"
+    STEP_EMBEDDING = "EMBEDDING"
+    STEP_PREDICTION = "PREDICTION"
+    STEP_MODEL_BUILDING = "MODEL_BUILDING"
+    STEP_BOLTZMANN_WEIGHTING = "BOLTZMANN_WEIGHTING"
+    STEP_PKA_PREDICTION = "PKA_PREDICTION"
+    STEP_PRIME = "PRIME"
+    STEP_CLUSTERING = "CLUSTERING"
+    STEP_RMSD = "RMSD"
+    STEP_RMSFILTER = "RMSFILTER"
+    STEP_DATA_MANIPULATION = "DATA_MANIPULATION"
+    STEP_DESMOND = "DESMOND"
+    STEP_DESMOND_SETUP = "DESMOND_SETUP"
+    STEP_FILTER = "FILTER"
+    STEP_PANTHER = "PANTHER"
+    STEP_SHAEP = "SHAEP"
+    STEP_PDB2GMX = "PDB2GMX"
+    STEP_PDB2GMX_LIG = "PDB2GMX_LIG"
+    STEP_EDITCONF = "EDITCONF"
+    STEP_SOLVATE = "SOLVATE"
+    STEP_GENION = "GENION"
+    STEP_GROMPP = "GROMPP"
+    STEP_MDRUN = "MDRUN"
+    STEP_FEATURE_COUNTER = "FEATURE_COUNTER"
+    STEP_TRJCONV = "TRJCONV"
+    STEP_TRJCAT = "TRJCAT"
+    STEP_GMX_RMSD = "GMX_RMSD"
+    STEP_CLUSTER = "CLUSTER"
+    STEP_DO_DSSP = "DO_DSSP"
+    STEP_LIGPREP = "LIGPREP"
+    STEP_GLIDE = "GLIDE"
+    STEP_AUTODOCKVINA_DOCKING = "VINA_DOCKING"
+    STEP_AUTODOCKVINA_TARGET_PREPARATION = "VINA_TARGET_PREPARATION"
+    STEP_FEP_PLUS_SETUP = "FEP_PLUS_SETUP"
+    STEP_FEP_PLUS_EXEC = "FEP_PLUS_EXEC"
+    STEP_FEP_PLUS_ANALYSIS = "FEP_PLUS_ANALYSIS"
+    STEP_PREPWIZARD = "PREPWIZARD"
+    STEP_MDPOCKET = "MDPOCKET"
+    STEP_PDB_FIXER = "PDB_FIXER"
+    STEP_PEPTIDE_EMBEDDER = "PEPTIDE_EMBEDDER"
+    STEP_GMX_MMPBSA = "GMX_MMPBSA"
+
+    # PMX SCRIPTS
+    STEP_PMX_ABFE = "PMX_ABFE"
+    STEP_PMX_ANALYSE = "PMX_ANALYSE"
+    STEP_PMX_ATOMMAPPING = "PMX_ATOMMAPPING"
+    STEP_PMX_DOUBLEBOX = "PMX_DOUBLEBOX"
+    STEP_PMX_GENLIB = "PMX_GENLIB"
+    STEP_PMX_GENTOP = "PMX_GENTOP"
+    STEP_PMX_LIGANDHYBRID = "PMX_LIGANDHYBRID"
+    STEP_PMX_MUTATE = "PMX_MUTATE"
+    STEP_PMX_SETUP = "PMX_SETUP"
+    STEP_PMX_PREPARE_SIMULATIONS = "PMX_PREPARE_SIMULATIONS"
+    STEP_PMX_BOX_WATER_IONS = "PMX_BOX_WATER_IONS"
+    STEP_PMX_PREPARE_TRANSITIONS = "PMX_PREPARE_TRANSITIONS"
+    STEP_PMX_RUN_SIMULATIONS = "PMX_RUN_SIMULATIONS"
+    STEP_PMX_ASSEMBLE_SYSTEMS = "PMX_ASSEMBLE_SYSTEMS"
+    STEP_PMX_RUN_ANALYSIS = "PMX_RUN_ANALYSIS"
+
+    STEP_DSSP = "DSSP"
+    STEP_TS_CLUSTER = "TS_CLUSTER"
+    STEP_ESP_SIM = "ESP_SIM"
+    STEP_JOB_CONTROL = "JOB_CONTROL"
+    STEP_ACTIVE_LEARNING = "ACTIVE_LEARNING"
+
+    # flow control blocks
+    STEP_ITERATOR = "ITERATOR"
+
+    # execution
+    EXEC = "execution"
+    EXEC_PREFIXEXECUTION = "prefix_execution"
+    EXEC_BINARYLOCATION = "binary_location"
+    EXEC_PARALLELIZATION = "parallelization"
+    EXEC_PARALLELIZATION_CORES = "cores"
+    EXEC_PARALLELIZATION_MAXLENSUBLIST = "max_length_sublists"
+    EXEC_FAILUREPOLICY = "failure_policy"
+    EXEC_FAILUREPOLICY_NTRIES = "n_tries"
+    EXEC_RESOURCE = "resource"
+    EXEC_JOB_CONTROL = "job_control"
+    EXEC_JOB_CONTROL_PARTITION = "partition"
+    EXEC_JOB_CONTROL_GRES = "gres"
+    EXEC_JOB_CONTROL_MODULES = "modules"
+    EXEC_JOB_CONTROL_MEM = "mem"
+    EXEC_JOB_CONTROL_CORES = "cores"
+    EXEC_JOB_CONTROL_OTHER_ARGS = "other_args"
+
+    # settings
+    SETTINGS = "settings"
+    SETTINGS_ARGUMENTS = "arguments"
+    SETTINGS_ARGUMENTS_FLAGS = "flags"
+    SETTINGS_ARGUMENTS_PARAMETERS = "parameters"
+    SETTINGS_ADDITIONAL = "additional"
+
+    PIPE_INPUT = "pipe_input"
+
+    # annotation: fixed strings
+    ANNOTATION_TAG_DOCKING_SCORE = "docking_score"
+    ANNOTATION_TAG_G_SCORE = "g_score"
+
+    ANNOTATION_GRID_ID = "grid_id"
+    ANNOTATION_GRID_PATH = "grid_path"
+    ANNOTATION_GRID_FILENAME = "grid_filename"
+
+    GRID_IDS = "grid_ids"  # enforces given list of IDs rather than indices
+
+    # I/O and "hand-over"
+    # ---------
+    FORMAT_SDF = "SDF"
+    FORMAT_CSV = "CSV"
+    FORMAT_SMI = "SMI"
+    FORMAT_MOL2 = "MOL2"
+    FORMAT_XTB = "XTB"
+    FORMAT_PDB = "PDB"
+    FORMAT_PKL = "PKL"
+    FORMAT_SMILES = "SMILES"
+    FORMAT_PLAIN = "PLAIN"
+    FORMAT_TXT = "TXT"
+    FORMAT_JSON = "JSON"
+    FORMAT_DTR = "DTR"
+    FORMAT_CMS = "CMS"
+
+    INPUT = "input"
+    INPUT_FIELD = "field"
+    INPUT_SOURCES = "sources"
+    INPUT_COMPOUNDS = "compounds"
+    INPUT_ENUMERATIONS = "enumerations"
+    INPUT_EXTENSION = "extension"
+    INPUT_SOURCE = "source"
+    INPUT_GENERIC = "generic"
+    INPUT_FORMAT = "format"
+    INPUT_SOURCE_TYPE = "source_type"
+    INPUT_SOURCE_TYPE_FILE = "file"
+    INPUT_SOURCE_TYPE_DIR = "dir"
+    INPUT_SOURCE_TYPE_PATH = "path"
+    INPUT_SOURCE_TYPE_STEP = "step"
+    INPUT_SOURCE_TYPE_STRING = "string"
+    INPUT_SOURCE_TYPE_URL = "url"
+
+    INPUT_ENFORCE_IDS = "enforce_ids"
+    INPUT_ENFORCE_COMPOUND_IDS = "compound_ids"
+    INPUT_ENFORCE_ENUMERATION_IDS = "enumeration_ids"
+
+    INPUT_MERGE = "merge"
+    INPUT_MERGE_COMPOUNDS = "compounds"
+    INPUT_MERGE_COMPOUNDS_BY = "merge_compounds_by"
+    INPUT_MERGE_ENUMERATIONS = "enumerations"
+    INPUT_MERGE_ENUMERATIONS_BY = "merge_enumerations_by"
+    INPUT_MERGE_BY_NAME = "name"
+    INPUT_MERGE_BY_ID = "id"
+    INPUT_MERGE_BY_SMILE = "smile"
+
+    FILE_TYPE_PDB = "pdb"
+    FILE_SIZE_THRESHOLD = 2e9
+
+    # CSV settings
+    INPUT_CSV_DELIMITER = "delimiter"
+    INPUT_CSV_COLUMNS = "columns"
+    INPUT_CSV_SMILES_COLUMN = "smiles"
+    INPUT_CSV_NAMES_COLUMN = "names"
+
+    # write-out settings
+    WRITEOUT = "writeout"
+    WRITEOUT_CONFIG = "config"
+
+    WRITEOUT_COMP = "compounds"
+    WRITEOUT_COMP_CATEGORY = "category"
+    WRITEOUT_COMP_CATEGORY_CONFORMERS = "conformers"
+    WRITEOUT_COMP_CATEGORY_ENUMERATIONS = "enumerations"
+    WRITEOUT_COMP_CATEGORY_EXTRADATA = "extra_data"
+    WRITEOUT_COMP_KEY = "key"
+    WRITEOUT_COMP_AGGREGATION = "aggregation"
+    WRITEOUT_COMP_AGGREGATION_MODE = "mode"
+    WRITEOUT_COMP_AGGREGATION_MODE_ALL = "all"
+    WRITEOUT_COMP_AGGREGATION_MODE_BESTPERCOMPOUND = "best_per_compound"
+    WRITEOUT_COMP_AGGREGATION_MODE_BESTPERENUMERATION = "best_per_enumeration"
+    WRITEOUT_COMP_AGGREGATION_HIGHESTISBEST = "highest_is_best"
+    WRITEOUT_COMP_AGGREGATION_KEY = "key"
+    WRITEOUT_COMP_SELECTED_TAGS = "selected_tags"
+    WRITEOUT_COMP_SELECTED_TAGS_KEY = "key"
+    WRITEOUT_COMP_SELECTED_TAGS_HIGHESTISBEST = "highest_is_best"
+
+    WRITEOUT_GENERIC = "generic"
+    WRITEOUT_GENERIC_KEY = "key"
+
+    WRITEOUT_DESTINATION = "destination"
+    WRITEOUT_DESTINATION_RESOURCE = "resource"
+    WRITEOUT_DESTINATION_TYPE = "type"
+    WRITEOUT_DESTINATION_TYPE_FILE = "file"
+    WRITEOUT_DESTINATION_TYPE_REINVENT = "reinvent"
+    WRITEOUT_DESTINATION_TYPE_STDOUT = "stdout"
+    WRITEOUT_DESTINATION_TYPE_STDERR = "stderr"
+    WRITEOUT_DESTINATION_TYPE_REST = "rest"
+    WRITEOUT_DESTINATION_FORMAT = "format"
+    WRITEOUT_DESTINATION_MERGE = "merge"
+    WRITEOUT_DESTINATION_AUTOMATIC = "automatic"
+    WRITEOUT_DESTINATION_BASE_NAME = "base_name"
+    WRITEOUT_DESTINATION_DIR = "dir"
+    WRITEOUT_DESTINATION_MODE = "mode"
+
+    TOKEN_GUARD = "token_guard"
+
+    # try to find the internal value and return
+    # def __getattr__(self, name):
+    #     if name in self:
+    #         return name
+    #     raise AttributeError
+
+    # prohibit any attempt to set any values
+    # def __setattr__(self, key, value):
+    #     raise ValueError("No changes allowed.")
+
+
+class IteratorEnum(str, Enum):
+    N_ITERS = "n_iters"
+    ALL = "all"
+    SINGLE = "single"
+
+
+class StepEmbeddingEnum:
+    METHOD = "method"
+    METHOD_RDKIT = "RDKIT"
+
+    EMBED_AS = "embed_as"
+    EMBED_AS_ENUMERATIONS = "enumerations"
+    EMBED_AS_CONFORMERS = "conformers"
+
+    RDKIT_PROTONATE = "protonate"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepClusteringEnum:
+    N_CLUSTERS = "n_clusters"
+    MAX_ITER = "max_iter"
+    TOP_N_PER_SOLVENT = "top_n_per_solvent"
+    FEATURES = "features"
+    FREE_ENERGY_SOLVENT_TAGS = "free_energy_solvent_tags"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepRMSFilterEnum:
+    THRESHOLD = "threshold"  # RMS threshold in Angstrom
+
+    # order by this tag in picking the conformers
+    ORDER_BY = "order_by"
+    ORDER_ASCENDING = "ascending"
+
+    METHOD = "method"  # calculation method
+    METHOD_BEST = "best"  # RDkit's "GetBestRMS"
+    METHOD_ALIGNMOL = "alignmol"  # RDkit's "AlignMol"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepRMSDEnum:
+
+    METHOD = "method"  # calculation method
+    METHOD_BEST = "best"  # RDkit's "GetBestRMS"
+    METHOD_ALIGNMOL = "alignmol"  # RDkit's "AlignMol"
+
+    RMSD_TAG = "rmsd"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepFeatureCounterEnum:
+
+    FEATURE = "feature"
+    LEVEL = "level"
+    LEVEL_ENUMERATION = "enumeration"
+    LEVEL_CONFORMER = "conformer"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepDataManipulationEnum:
+
+    # specify actions that can be used
+    ACTION = "action"
+    ACTION_NO_ACTION = (
+        # used to skip any calculation (e.g. to just pool input data)
+        "no_action"
+    )
+    CONVERT_MAE_TO_PDB = "mae2pdb"
+    ASSEMBLE_COMPLEXES = "assemble_complexes"
+    ACTION_ATTACH_CONFORMERS_AS_EXTRA = "attach_conformers_as_extra"
+    COLLECT_ITERATOR_RESULTS = "collect_iterator_results"
+    FILTER = "filter"
+
+    # --> For ACTION_ATTACH_CONFORMERS_AS_EXTRA
+    # --- Match everything with the same <compound_id>:<enumeration_id>:<conformer_id> string
+    MATCH_SOURCE = (
+        "source"  # step from which the conformers are to be used for matching
+    )
+    KEY_MATCHED = "matched"  # extra-data key for matched data
+    RECEPTOR = "receptor"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepBoltzmannWeightingEnum:
+    PROPERTIES = "properties"
+    PROPERTIES_INPUT = "input"
+    PROPERTIES_OUTPUT = "output"
+
+    WEIGHT = "weight"
+    WEIGHT_INPUT = "input"
+    WEIGHT_OUTPUT_PREFIX = "output_prefix"
+    WEIGHT_PROPERTIES = "properties"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepPredictorEnum:
+    MODEL_PATH = "model_path"
+    FEATURES = "features"
+    NAME_PREDICTED = "name_predicted"
+    NAME_PREDICTED_DEFAULT = "pred_value"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepModelBuilderEnum:
+    # configuration fields
+    DATA = "data"
+    DATA_INPUT_COLUMN = "input_column"
+    DATA_RESPONSE_COLUMN = "response_column"
+    DATA_TRAININGSET_FILE = "training_dataset_file"
+    DATA_TESTSET_FILE = "test_dataset_file"
+
+    # fixed tempfile names
+    TMP_INPUT_CONFIG = "input_config.json"
+    TMP_INPUT_DATA = "input_data.csv"
+    TMP_OUTPUT_BEST_MODEL = "best_model.pkl"
+    TMP_OUTPUT_BEST_PARAMETERS = "best_parameters.json"
+    TMP_OUTPUT_PRODUCTION_MODEL = "production_model.pkl"
+
+    # fields
+    FIELD_KEY_PRODUCTION_MODEL = "production_model"
+    FIELD_KEY_BEST_CONFIGURATION = "best_configuration"
+    FIELD_KEY_INPUT_DATA = "input_data"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class TokenGuardEnum:
+    TG = "token_guard"
+    TG_PREFIX_EXECUTION = "prefix_execution"
+    TG_BINARY_LOCATION = "binary_location"
+    TG_TOKEN_POOLS = "token_pools"
+    TG_WAIT_INTERVAL_SECONDS = "wait_interval_seconds"
+    TG_WAIT_LIMIT_SECONDS = "wait_limit_seconds"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepPrepwizEnum:
+
+    REMOVE_RES = "remove_res"
+    LIGANDS = "ligands"
+    COFACTOR_IDS = [
+        "TDP",
+        "FAD",
+        "FMN",
+        "NAD",
+        "PNS",
+        "COA",
+        "PLP",
+        "GSH",
+        "BTN",
+        "FFO",
+        "B12",
+        "ASC",
+        "MQ7",
+        "UQ1",
+        "MGD",
+        "H4B",
+        "MDO",
+        "SAM",
+        "F43",
+        "COM",
+        "TP7",
+        "HEA",
+        "DPM",
+        "PQQ",
+        "TPQ",
+        "TRQ",
+        "LPA",
+        "HEM",
+    ]
+
+
+class StepLigprepEnum:
+    FILTER_FILE = "filter_file"
+
+    # the SDF tag with <identifier>-# (where # is the number of the enumeration starting with '1')
+    LIGPREP_VARIANTS = "s_lp_Variant"
+    # number from 0 to 1 (sums up to 1 over all variants)
+    LIGPREP_TAUTOMER_PROBABILITY = "r_lp_tautomer_probability"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepGlideEnum:
+    # input specification parameters
+    CONFIGURATION = "configuration"
+    TIME_LIMIT_PER_TASK = "time_limit_per_task"
+    MAESTRO_IN_FILE = "maestro_in_file"
+    MAESTRO_IN_FILE_PATH = "path"
+
+    # glide: fixed strings
+    # the docking score (including "Epik" corrections")
+    GLIDE_DOCKING_SCORE = "r_i_docking_score"
+    # the "docking score" without "Epik" corrections
+    GLIDE_GSCORE = "r_i_glide_gscore"
+    # the index of the ligand in the input file (starting with '1')
+    GLIDE_SOURCE_FILE_INDEX = "i_m_source_file_index"
+
+    GLIDE_POSEVIEWER_FILE_KEY = "structures_pv.maegz"
+    GLIDE_MAEGZ_DEFAULT_EXTENSION = "_pv.maegz"
+    GLIDE_SDF_DEFAULT_EXTENSION = "_lib.sdfgz"
+    GLIDE_LOG = ".log"
+    GLIDE_SDF = ".sdf"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepMacromodelEnum:
+    # COM file
+    COM_FILE = "com_file"
+    COM_FILE_PATH = "com_file.com"
+    COM_FILE_DEFAULT = """ MMOD       0      1      0      0     0.0000     0.0000     0.0000     0.0000
+ DEBG      55      0      0      0     0.0000     0.0000     0.0000     0.0000
+ FFLD      16      1      0      0     1.0000     0.0000     0.0000     0.0000
+ SOLV       3      1      0      0     0.0000     0.0000     0.0000     0.0000
+ EXNB       0      0      0      0     0.0000     0.0000     0.0000     0.0000
+ BDCO       0      0      0      0    89.4427 99999.0000     0.0000     0.0000
+ READ       0      0      0      0     0.0000     0.0000     0.0000     0.0000
+ CRMS       0      0      0      0     0.0000     0.8000     0.0000     0.0000
+ LMCS    1000      0      0      0     0.0000     0.0000     3.0000     6.0000
+ NANT       0      0      0      0     0.0000     0.0000     0.0000     0.0000
+ MCNV       1      5      0      0     0.0000     0.0000     0.0000     0.0000
+ MCSS       2      0      0      0    27.0000     0.0000     0.0000     0.0000
+ MCOP       1      0      0      0     0.5000     0.0000     0.0000     0.0000
+ DEMX       0    833      0      0    27.0000    54.0000     0.0000     0.0000
+ MSYM       0      0      0      0     0.0000     0.0000     0.0000     0.0000
+ AUOP       0      0      0      0   100.0000     0.0000     0.0000     0.0000
+ AUTO       0      2      1      1     0.0000     1.0000     0.0000     2.0000
+ CONV       2      0      0      0     0.0500     0.0000     0.0000     0.0000
+ MINI       1      0   2500      0     0.0000     0.0000     0.0000     0.0000"""
+
+    # fixed file names
+    MAE_INPUT = "input_mol.mae"
+    MAE_OUTPUT = "output_mol.mae"
+    SDF_OUTPUT = "output_mol.sdf"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepPrimeEnum:
+
+    RECEPTOR = "receptor"  # path to the receptor MAE file
+    POSEVIEWER = "poseviewer"
+
+    # fixed file names
+    SDF_INPUT = "input_mol.sdf"
+    MAE_INPUT = "input_mol.mae"
+    MAE_COMPLEX = "complex.mae"
+    MAE_OUTPUT = "complex-out.maegz"
+    MMGBSA_SCORE = "r_psp_MMGBSA_dG_Bind"
+    SDF_OUTPUT = "output_mol.sdf"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepTurbomoleEnum:
+    EXECUTION_MODE = "execution_mode"
+    SUCCESS = "success"
+    FAILED = "failed"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepPantherEnum:
+    # settings.additional
+    PANTHER_LOCATION = "panther_location"
+    PANTHER_CONFIG_FILE = "panther_config_file"
+    OUTPUT_FILE = "output_file"
+    PANTHER_CONFIG_DIR = "panther_config_file"
+    FIELDS = "fields"
+
+    # fields
+    FIELD_KEY_NEGATIVE_IMAGE = "negative_image"
+    FIELD_KEY_COORDINATES = "5-Center"
+    FIELD_KEY_PDB_FILE = "1-Pdb file"
+
+    # parameters
+    FIELDS_PARAMETERS_LIB = {
+        "2-Radius": "rad.lib",
+        "3-Angle": "angles.lib",
+        "4-Charge": "charges.lib",
+    }
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepShaepEnum:
+    # field keys for storing data
+    FIELD_KEY_NEGATIVE_IMAGE = "negative_image"
+    NEG_IMAGE_EXT = "mol2"
+
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepGromacsEnum:
+    FIELDS = "fields"
+    COFACTORS = "cofactors"
+    FORCEFIELD = "forcefield"
+    INPUT_FILE = "input_file"
+
+    FIELD_KEY_STRUCTURE = "gro"
+    FIELD_KEY_PDB = "pdb"
+    FIELD_KEY_TOPOL = "top"
+    FIELD_KEY_TPR = "tpr"
+    FIELD_KEY_MDP = "mdp"
+    FIELD_KEY_XTC = "xtc"
+    FIELD_KEY_ITP = "itp"
+    FIELD_KEY_LOG = "log"
+    FIELD_KEY_EDR = "edr"
+    FIELD_KEY_NDX = "ndx"
+    FILE_SIZE_THRESHOLD = 2000000000
+
+    MAKE_NDX_COMMAND = "make_ndx_command"
+    INDEX_FLAG = "-n"
+
+    #  magic strings associated with ligand parametrisation step
+    FORCEFIELD_ITP = "forcefield.itp"
+    LIGAND_ITP = "Ligand.itp"
+    INCLUDE_LIG_ITP = '#include "Ligand.itp"'
+    LIG_MOLECULE_GRP = "Ligand   1\n"
+    COMPLEX_TOP = "Complex.top"
+    COMPLEX_PDB = "Complex.pdb"
+    PROTEIN_PDB = "Protein.pdb"
+    PROTEIN_TOP = "Protein.top"
+    LIGAND_PDB = "Ligand.pdb"
+    LIGAND_MOL2 = "Ligand.mol2"
+    STD_INDEX = "index.ndx"
+    STD_TOPOL = "topol.top"
+    STD_TPR = "structure.tpr"
+    STD_XTC = "structure.xtc"
+    STD_STRUCTURE = "structure.gro"
+    POSRE_LIG = "posre_lig.itp"
+    FORCE_CONSTANTS = "1000 1000 1000"
+    LIG_ID = "lig_id"
+    COUPLING_GROUP = "Other"
+    MMPBSA_IN = "mmpbsa.in"
+    GROMACS_LOAD = "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+    AMBERTOOLS_LOAD = "module load AmberTools/21-fosscuda-2019a-Python-3.7.2"
+    WATER_AND_IONS = "Water_and_ions"
+    PROTEIN_OTHER = "Protein_Other"
+    SIM_COMPLETE = "Finished mdrun"
+    AUTO = "auto"
+    TC_GRPS = "tc-grps"
+    CLUSTERS_NUMBER = "clustersNumber"
+    LENGTHS = "lengths"
+    COUPLING_GROUPS = "coupling_groups"
+
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepCavExploreEnum:
+    FIELD_KEY_DTR = "dtr"
+    FIELD_KEY_CMS = "cms"
+    FIELD_KEY_DX = "dx"
+
+    # settings.additional
+    CAVITY_LOCATION = "cavity_location"
+    CAVITY_CONFIG_FILE = "cavity_config_file"
+    OUTPUT_FILE = "output_file"
+    CAVITY_CONFIG_DIR = "cavity_config_dir"
+    FIELDS = "fields"
+    SELECTION_TEXT = "selection_text"
+    PROTEIN = "protein"
+    NAME_CA = "name CA"
+    FRAME_LIST_FILE = "list_of_frames.txt"
+    MDPOCKET_COMMAND = "mdpocket"
+    MPI_THREADS = "mpi_threads"
+    EPS = "eps"
+    MIN_SAMPLES = "min_samples"
+    ISO_VALUE = "iso_value"
+    TRAJ_TYPE = "format"
+    THRESHOLD = "threshold"
+
+    # add own fixed strings and import in step
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepFepPlusEnum:
+    FIELD_KEY_POSEVIEWER = "poseviewer"
+    RECEPTOR_MAEGZ = "receptor.maegz"
+    STRUCT_SPLIT_BASE = "split"
+    STRUCTCAT_MAEGZ_OUTFILE = "concatenated.mae"
+    STRUCTCAT_SDF_OUTFILE = "concatenated.sdf"
+    FEP_MAPPER_OUTPUT = "out"
+    FMP_OUTPUT_FILE = "out.fmp"
+    LOGFILE = "multisim.log"
+    EDGE_HEADER_LINE = "* Edge calculated properties (units in kcal/mol)"
+    NODE_HEADER_LINE = "* Node calculated properties (units in kcal/mol)"
+    SIMULATION_PROTOCOL = "* Simulation Protocol"
+    SIMILARITY = "* Similarity"
+    DATA_TERMINUS = "fep_mapper_cleanup: Loading output graph"
+    HOST_FLAG = "-HOST"
+    WAIT_FLAG = "-WAIT"
+    JOBNAME_FLAG = "-JOBNAME"
+    REFERENCE_DG = "ref_dg"
+    JOBID_STRING = "JobId:"
+    XRAY_STRUCTURES = "xray_structures"
+    XRAY_SPLIT = "xray_split"
+    RETRIES = "-RETRIES"
+
+    FILE_NAME = "--name"
+    FEP_EXEC_COMPLETE = "Multisim completed."
+    FEP_EXEC_PARTIAL_COMPLETE = "Multisim partially completed."
+
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+        # prohibit any attempt to set any values
+
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepFilterEnum:
+    FILTER_LEVEL = "filter_level"
+    CONFORMERS = "conformers"
+    COMPOUNDS = "compounds"
+    HIGHEST_IS_BEST = "highest_is_best"
+    ENUMERATIONS = "enumerations"
+    CRITERIA = "criteria"
+    AGGREGATION = "aggregation"
+    RETURN_N = "return_n"
+
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+        # prohibit any attempt to set any values
+
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepDesmondEnum:
+
+    PREPROCESS_MSJ = "config.msj"
+    PRODUCTION_MSJ = "production.msj"
+    PRODUCTION_CFG = "prod.cfg"
+    MSJ_FIELDS = "msj_fields"
+    CFG_FIELDS = "cfg_fields"
+    SETUP_MSJ_FIELDS = "setup_msj_fields"
+    CONFIG = "config"
+    TOKEN_STR = "DESMOND_GPGPU:16"
+
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+        # prohibit any attempt to set any values
+
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepPdbFixerEnum:
+    ADD_MISSING_HYDROGENS = "add_missing_hydrogens"
+    ADD_MISSING_ATOMS = "add_missing_atoms"
+    FIND_MISSING_ATOMS = "find_missing_atoms"
+    FIND_MISSING_RESIDUES = "find_missing_residues"
+    REPLACE_NONSTANDARD_RESIDUES = "replace_nonstandard_residues"
+    REMOVE_CHAINS = "remove_chains"
+
+
+class StepDSSPEnum:
+    pass
+
+
+class StepCressetEnum:
+    SUCCESS = "success"
+
+
+class StepAutoDockVinaEnum:
+
+    ADV_RECEPTOR_PATH = "receptor_path"
+    ADV_SEED = "seed"
+    ADV_SEARCH_SPACE = "search_space"
+    ADV_SEARCH_SPACE_CENTER_X = "--center_x"
+    ADV_SEARCH_SPACE_CENTER_Y = "--center_y"
+    ADV_SEARCH_SPACE_CENTER_Z = "--center_z"
+    ADV_SEARCH_SPACE_SIZE_X = "--size_x"
+    ADV_SEARCH_SPACE_SIZE_Y = "--size_y"
+    ADV_SEARCH_SPACE_SIZE_Z = "--size_z"
+
+    CONFIGURATION = "configuration"
+    NUMBER_POSES = "number_poses"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepAutoDockVinaTargetPreparationEnum:
+
+    ADV_PDBQT = ".pdbqt"
+    INPUT_RECEPTOR_PDB = "input_receptor_pdb"
+    OUTPUT_RECEPTOR_PDBQT = "output_receptor_pdbqt"
+    PH = "pH"
+    EXTRACT_BOX = "extract_box"
+    EXTRACT_BOX_REFERENCE_LIGAND_PATH = "reference_ligand_path"
+    EXTRACT_BOX_REFERENCE_LIGAND_FORMAT = "reference_ligand_format"
+    EXTRACT_BOX_REFERENCE_LIGAND_FORMAT_PDB = "PDB"
+    EXTRACT_BOX_REFERENCE_LIGAND_FORMAT_SDF = "SDF"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class StepActiveLearningEnum:
+
+    ORACLE_CONFIG = "oracle_config"
+    SMILES = "SMILES"
+    MOLECULE = "Moleucle"
+    VIRTUAL_LIB = "virtual_lib"
+    INIT_SAMPLES = "init_samples"
+    MORGAN_FP = "MorganFP"
+    N_ROUNDS = "n_rounds"
+    BATCH_SIZE = "batch_size"
+    CRITERIA = "criteria"
+    VALIDATION_LIB = "validation_lib"
diff --git a/icolos/utils/enums/step_initialization_enum.py b/icolos/utils/enums/step_initialization_enum.py
new file mode 100644
index 0000000..af53e14
--- /dev/null
+++ b/icolos/utils/enums/step_initialization_enum.py
@@ -0,0 +1,110 @@
+from icolos.core.job_control.job_control import StepJobControl
+from icolos.core.workflow_steps.autodockvina.docking import StepAutoDockVina
+from icolos.core.workflow_steps.autodockvina.target_preparation import (
+    StepAutoDockVinaTargetPreparation,
+)
+from icolos.core.workflow_steps.calculation.electrostatics.esp_sim import StepEspSim
+from icolos.core.workflow_steps.calculation.feature_counter import StepFeatureCounter
+from icolos.core.workflow_steps.gromacs.do_dssp import StepGMXDoDSSP
+from icolos.core.workflow_steps.gromacs.mmpbsa import StepGMXmmpbsa
+from icolos.core.workflow_steps.cavity_explorer.mdpocket import StepMDpocket
+from icolos.core.workflow_steps.gromacs.trajcat import StepGMXTrjcat
+from icolos.core.workflow_steps.io.data_manipulation import StepDataManipulation
+from icolos.core.workflow_steps.schrodinger.fep_analysis import StepFepPlusAnalysis
+from icolos.core.workflow_steps.structure_prediction.pdb_fixer import StepPdbFixer
+from icolos.core.workflow_steps.gromacs import *
+from icolos.core.workflow_steps.calculation.boltzmann_weighting import (
+    StepBoltzmannWeighting,
+)
+from icolos.core.workflow_steps.calculation.rmsd import StepRMSD
+from icolos.core.workflow_steps.schrodinger import *
+from icolos.core.workflow_steps.calculation.cosmo import StepCosmo
+from icolos.core.workflow_steps.calculation.turbomole import StepTurbomole
+from icolos.core.workflow_steps.confgen.crest import StepCREST
+from icolos.core.workflow_steps.pmx import *
+from icolos.core.workflow_steps.confgen.omega import StepOmega
+from icolos.core.workflow_steps.confgen.xtb import StepXTB
+from icolos.core.workflow_steps.io.embedder import StepEmbedding
+from icolos.core.workflow_steps.io.initialize_compound import StepInitializeCompound
+from icolos.core.workflow_steps.prediction.predictor import StepPredictor
+from icolos.core.workflow_steps.prediction.model_building import StepModelBuilder
+from icolos.core.workflow_steps.calculation.clustering import StepClustering
+from icolos.core.workflow_steps.calculation.rms_filter import StepRMSFilter
+from icolos.core.workflow_steps.calculation.panther import StepPanther
+from icolos.core.workflow_steps.calculation.shaep import StepShaep
+from icolos.core.workflow_steps.structure_prediction.peptide_embedder import (
+    StepPeptideEmbedder,
+)
+from icolos.core.workflow_steps.structure_prediction.dssp import StepDSSP
+from icolos.utils.enums.step_enums import StepBaseEnum
+
+
+_SBE = StepBaseEnum
+
+
+class StepInitializationEnum:
+
+    STEP_INIT_DICT = {
+        _SBE.STEP_CREST: StepCREST,
+        _SBE.STEP_OMEGA: StepOmega,
+        _SBE.STEP_XTB: StepXTB,
+        _SBE.STEP_MACROMODEL: StepMacromodel,
+        _SBE.STEP_TURBOMOLE: StepTurbomole,
+        _SBE.STEP_COSMO: StepCosmo,
+        _SBE.STEP_INITIALIZATION: StepInitializeCompound,
+        _SBE.STEP_EMBEDDING: StepEmbedding,
+        _SBE.STEP_PREDICTION: StepPredictor,
+        _SBE.STEP_MODEL_BUILDING: StepModelBuilder,
+        _SBE.STEP_BOLTZMANN_WEIGHTING: StepBoltzmannWeighting,
+        _SBE.STEP_PRIME: StepPrime,
+        _SBE.STEP_DESMOND: StepDesmondExec,
+        _SBE.STEP_DESMOND_SETUP: StepDesmondSetup,
+        _SBE.STEP_CLUSTERING: StepClustering,
+        _SBE.STEP_RMSFILTER: StepRMSFilter,
+        _SBE.STEP_PANTHER: StepPanther,
+        _SBE.STEP_SHAEP: StepShaep,
+        _SBE.STEP_PDB2GMX: StepGMXPdb2gmx,
+        _SBE.STEP_EDITCONF: StepGMXEditConf,
+        _SBE.STEP_SOLVATE: StepGMXSolvate,
+        _SBE.STEP_GENION: StepGMXGenion,
+        _SBE.STEP_GROMPP: StepGMXGrompp,
+        _SBE.STEP_MDRUN: StepGMXMDrun,
+        _SBE.STEP_TRJCONV: StepGMXTrjconv,
+        _SBE.STEP_TRJCAT: StepGMXTrjcat,
+        _SBE.STEP_CLUSTER: StepGMXCluster,
+        _SBE.STEP_DO_DSSP: StepGMXDoDSSP,
+        _SBE.STEP_GMX_RMSD: StepGMXrmsd,
+        _SBE.STEP_LIGPREP: StepLigprep,
+        _SBE.STEP_GLIDE: StepGlide,
+        _SBE.STEP_FEP_PLUS_SETUP: StepFepPlusSetup,
+        _SBE.STEP_FEP_PLUS_EXEC: StepFepPlusExec,
+        _SBE.STEP_FEP_PLUS_ANALYSIS: StepFepPlusAnalysis,
+        _SBE.STEP_PREPWIZARD: StepPrepwizard,
+        _SBE.STEP_MDPOCKET: StepMDpocket,
+        _SBE.STEP_PEPTIDE_EMBEDDER: StepPeptideEmbedder,
+        _SBE.STEP_PDB_FIXER: StepPdbFixer,
+        _SBE.STEP_GMX_MMPBSA: StepGMXmmpbsa,
+        _SBE.STEP_TS_CLUSTER: StepClusterTS,
+        _SBE.STEP_DSSP: StepDSSP,
+        _SBE.STEP_RMSD: StepRMSD,
+        _SBE.STEP_DATA_MANIPULATION: StepDataManipulation,
+        _SBE.STEP_PMX_ASSEMBLE_SYSTEMS: StepPMXAssembleSystems,
+        _SBE.STEP_PMX_ATOMMAPPING: StepPMXatomMapping,
+        _SBE.STEP_PMX_ABFE: StepPMXabfe,
+        _SBE.STEP_PMX_DOUBLEBOX: StepPMXdoublebox,
+        _SBE.STEP_PMX_LIGANDHYBRID: StepPMXligandHybrid,
+        _SBE.STEP_PMX_BOX_WATER_IONS: StepPMXBoxWaterIons,
+        _SBE.STEP_PMX_SETUP: StepPMXSetup,
+        _SBE.STEP_PMX_PREPARE_TRANSITIONS: StepPMXPrepareTransitions,
+        _SBE.STEP_PMX_PREPARE_SIMULATIONS: StepPMXPrepareSimulations,
+        _SBE.STEP_PMX_RUN_ANALYSIS: StepPMXRunAnalysis,
+        _SBE.STEP_PMX_MUTATE: StepPMXmutate,
+        _SBE.STEP_PMX_GENTOP: StepPMXgentop,
+        _SBE.STEP_PMX_GENLIB: StepPMXgenlib,
+        _SBE.STEP_FEATURE_COUNTER: StepFeatureCounter,
+        _SBE.STEP_AUTODOCKVINA_DOCKING: StepAutoDockVina,
+        _SBE.STEP_AUTODOCKVINA_TARGET_PREPARATION: StepAutoDockVinaTargetPreparation,
+        _SBE.STEP_PMX_RUN_SIMULATIONS: StepPMXRunSimulations,
+        _SBE.STEP_JOB_CONTROL: StepJobControl,
+        _SBE.STEP_ESP_SIM: StepEspSim,
+    }
diff --git a/icolos/utils/enums/write_out_enums.py b/icolos/utils/enums/write_out_enums.py
new file mode 100644
index 0000000..30bd91f
--- /dev/null
+++ b/icolos/utils/enums/write_out_enums.py
@@ -0,0 +1,47 @@
+class WriteOutEnum:
+
+    RDKIT_NAME = "_Name"
+    INDEX_STRING = "index_string"
+    COMPOUND_NAME = "compound_name"
+
+    # REINVENT-compatible JSON write-out
+    JSON_RESULTS = "results"
+    JSON_NAMES = "names"
+    JSON_NA = ""
+    JSON_VALUES = "values"
+    JSON_VALUES_KEY = "values_key"
+
+    SDF = "sdf"
+    PDB = "pdb"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class RunVariablesEnum:
+
+    PREFIX = "["
+    POSTFIX = "]"
+    COMPOUND_ID = "compound_id"
+    ENUMERATION_ID = "enumeration_id"
+    CONFORMER_ID = "conformer_id"
+    COMPOUND_NAME = "compound_name"
+    ENUMERATION_STRING = "enumeration_string"
+    CONFORMER_STRING = "conformer_string"
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
diff --git a/icolos/utils/execute_external/__init__.py b/icolos/utils/execute_external/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/utils/execute_external/autodockvina.py b/icolos/utils/execute_external/autodockvina.py
new file mode 100644
index 0000000..0c2c9f8
--- /dev/null
+++ b/icolos/utils/execute_external/autodockvina.py
@@ -0,0 +1,41 @@
+from icolos.utils.enums.program_parameters import AutoDockVinaEnum
+from icolos.utils.execute_external.execute import ExecutorBase
+
+_EE = AutoDockVinaEnum()
+
+
+class AutoDockVinaExecutor(ExecutorBase):
+    """For the execution of AutoDock Vina 1.2.0."""
+
+    def __init__(self, prefix_execution=None, binary_location=None):
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        # check, whether a proper executable is provided
+        if command not in [_EE.VINA_CALL]:
+            raise ValueError(
+                "Parameter command must be in dictionary of the internal AutoDock Vina executable list."
+            )
+
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=None,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        try:
+            result = self.execute(
+                command=_EE.VINA_CALL, arguments=[_EE.VINA_HELP], check=True
+            )
+            if result.returncode == 0:
+                return True
+            return False
+        except Exception as e:
+            return False
diff --git a/icolos/utils/execute_external/batch_executor.py b/icolos/utils/execute_external/batch_executor.py
new file mode 100644
index 0000000..cee1d0a
--- /dev/null
+++ b/icolos/utils/execute_external/batch_executor.py
@@ -0,0 +1,145 @@
+import os
+from shlex import quote
+from icolos.utils.execute_external.execute import ExecutorBase
+from icolos.utils.enums.program_parameters import SlurmEnum
+import subprocess
+from typing import List
+import time
+from tempfile import mkstemp
+
+_SE = SlurmEnum()
+
+
+class BatchExecutor(ExecutorBase):
+    """For execution of batch jobs using either Slurm or SGE scheduler."""
+
+    def __init__(
+        self,
+        cores: int,
+        partition: str,
+        time: str,
+        mem: str,
+        modules: List,
+        other_args: dict,
+        gres: str,
+        prefix_execution=None,
+        binary_location=None,
+    ):
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+        self.cores = cores
+        self.partition = partition
+        self.time = time
+        self.mem = mem
+        self.modules = modules
+        self.other_args = other_args
+        self.gres = gres
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+
+        batch_script = self._construct_slurm_header()
+        command = self._prepare_command(command, arguments, pipe_input)
+        batch_script.append(command)
+        _, tmpfile = mkstemp(dir=location, suffix=".sh")
+        with open(tmpfile, "w") as f:
+            for line in batch_script:
+                f.write(line)
+                f.write("\n")
+
+        sbatch_command = f"sbatch {tmpfile}"
+        # execute the batch script
+        result = super().execute(
+            command=sbatch_command, arguments=[], location=location
+        )
+        job_id = result.stdout.split()[-1]
+        state = self._wait_for_job_completion(job_id=job_id)
+
+        # check the result from slurm
+        if check == True:
+            if state != _SE.COMPLETED:
+                raise subprocess.SubprocessError(
+                    f"Subprocess returned non-zero exit status:\n{sbatch_command}\n Status:\n{state}"
+                )
+        return state
+
+    def is_available(self):
+        raise NotImplementedError(
+            "Cannot reliably check, whether a random program executes properly - do not use."
+        )
+
+    def _prepare_command(
+        self, command: str, arguments: List, pipe_input: str = None
+    ) -> str:
+        arguments = [quote(str(arg)) for arg in arguments]
+
+        # allow for piped input to be passed to binaries
+        if pipe_input is not None:
+            # pipe_input = self._parse_pipe_input(pipe_input)
+            command = pipe_input + " | " + command
+
+        # check, if command (binary) is to be found at a specific location (rather than in $PATH)
+        if self._binary_location is not None:
+            command = os.path.join(self._binary_location, command)
+
+        # check, if the something needs to be added before the execution of the "rDock" command
+        if self._prefix_execution is not None:
+            command = self._prefix_execution + " && " + command
+
+        # execute; if "location" is set, change to this directory and execute there
+        complete_command = command + " " + " ".join(str(e) for e in arguments)
+        complete_command = [complete_command.replace("'", "")]
+        return " ".join(complete_command)
+
+    def _wait_for_job_completion(self, job_id):
+        completed = False
+        state = None
+        while completed is False:
+            state = self._check_job_status(job_id)
+            if state in [_SE.PENDING, _SE.RUNNING]:
+                time.sleep(5)
+                continue
+            elif state == _SE.COMPLETED:
+                completed = True
+            elif state == _SE.FAILED:
+                completed = True
+
+        return state
+
+    def _check_job_status(self, job_id):
+        """
+        Monitor the status of a previously submitted job, return the result
+        """
+        command = f"module load slurmtools && jobinfo {job_id}"
+        result = subprocess.run(
+            command,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            universal_newlines=True,
+        )
+
+        state = None
+        for line in result.stdout.split("\n"):
+            if _SE.STATE in line:
+                state = line.split(":")[-1].split()[0]
+        return state
+
+    def _construct_slurm_header(self):
+        header = [
+            "#!/bin/bash",
+            f"#SBATCH  -c{self.cores}",
+            f"#SBATCH -p {self.partition}",
+            f"#SBATCH --time={self.time}",
+        ]
+        header.append(f"#SBATCH --gres={self.gres}")
+        for key, value in self.other_args.items():
+            header.append(f"#SBATCH {key}={value}")
+
+        for module in self.modules:
+            header.append(f"module load {module}")
+
+        return header
diff --git a/icolos/utils/execute_external/cresset_executor.py b/icolos/utils/execute_external/cresset_executor.py
new file mode 100644
index 0000000..533a6f0
--- /dev/null
+++ b/icolos/utils/execute_external/cresset_executor.py
@@ -0,0 +1,40 @@
+from icolos.utils.execute_external.execute import ExecutorBase
+
+
+class CressetExecutor(ExecutorBase):
+    """For the execution of Cresset binaries binary."""
+
+    def __init__(self, prefix_execution=None, binary_location=None):
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        # check, whether a proper executable is provided
+        # if command not in [EE.OMEGA]:
+        #     raise ValueError(
+        #         "Parameter command must be an dictionary of the internal Omega executable list."
+        #     )
+
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=None,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        # try:
+        #     result = self.execute(
+        #         command=EE.OMEGA, arguments=[EE.OMEGA_HELP], check=True
+        #     )
+
+        #     if EE.OMEGA_HELP_IDENTIFICATION_STRING in result.stderr:
+        #         return True
+        #     return False
+        # except Exception as e:
+        #     return False
+        pass
diff --git a/icolos/utils/execute_external/crest.py b/icolos/utils/execute_external/crest.py
new file mode 100644
index 0000000..bf0901a
--- /dev/null
+++ b/icolos/utils/execute_external/crest.py
@@ -0,0 +1,41 @@
+from icolos.utils.execute_external.execute import ExecutorBase
+from icolos.utils.enums.program_parameters import CrestEnum
+
+
+EE = CrestEnum()
+
+
+class CrestExecutor(ExecutorBase):
+    """For the execution of the "crest" binary."""
+
+    def __init__(self, prefix_execution=None, binary_location=None):
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        # check, whether a proper executable is provided
+        if command not in [EE.CREST]:
+            raise ValueError(
+                "Parameter command must be an dictionary of the internal Crest executable list."
+            )
+
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=location,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        try:
+            result = self.execute(command=EE.CREST, arguments=[EE.CREST_H], check=True)
+
+            if EE.CREST_HELP_IDENTIFICATION_STRING in result.stdout:
+                return True
+            return False
+        except Exception as e:
+            return False
diff --git a/icolos/utils/execute_external/execute.py b/icolos/utils/execute_external/execute.py
new file mode 100644
index 0000000..6398f38
--- /dev/null
+++ b/icolos/utils/execute_external/execute.py
@@ -0,0 +1,101 @@
+import os
+import abc
+import subprocess
+from shlex import quote
+
+from icolos.utils.enums.execution_enums import ExecutionResourceEnum
+from icolos.utils.enums.program_parameters import SlurmEnum
+
+_ERE = ExecutionResourceEnum
+_SE = SlurmEnum()
+
+
+class ExecutorBase(metaclass=abc.ABCMeta):
+    """Virtual base class for the general and program-specific executors."""
+
+    def __init__(self, prefix_execution=None, binary_location=None):
+        # if something needs to be attached to the execution string each time, store it here; if not, value is "None"
+        self._prefix_execution = prefix_execution
+        self._binary_location = binary_location
+        # initialise from the step with self.execution.resource dict
+
+    @abc.abstractmethod
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        # to avoid security issues, escape the arguments
+        arguments = [quote(str(arg)) for arg in arguments]
+
+        # allow for piped input to be passed to binaries
+        if pipe_input is not None:
+            # pipe_input = self._parse_pipe_input(pipe_input)
+            command = pipe_input + " | " + command
+
+        # check, if command (binary) is to be found at a specific location (rather than in $PATH)
+        if self._binary_location is not None:
+            command = os.path.join(self._binary_location, command)
+
+        # check, if the something needs to be added before the execution of the "rDock" command
+        if self._prefix_execution is not None:
+            command = self._prefix_execution + " && " + command
+
+        # execute; if "location" is set, change to this directory and execute there
+        complete_command = command + " " + " ".join(str(e) for e in arguments)
+        complete_command = [complete_command.replace("'", "")]
+        # print(complete_command)
+        old_cwd = os.getcwd()
+        if location is not None:
+            os.chdir(location)
+
+        # determine whether this is to be run using local resources or as a batch job
+        result = subprocess.run(
+            complete_command,
+            check=False,  # use the manual check to provide better debugginf information than subprocess
+            # convert output to string (instead of byte array)
+            universal_newlines=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            shell=True,
+        )
+
+        if check:
+            if result.returncode != 0:
+                raise subprocess.SubprocessError(
+                    f"Subprocess returned non-zero exit status:\n{complete_command}\nReturn code:\n{result.returncode}\nSTDERR:\n{result.stderr}\nSTDOUT:\n{result.stdout}"
+                )
+        os.chdir(old_cwd)
+        return result
+
+    @abc.abstractmethod
+    def is_available(self):
+        raise NotImplementedError("Overwrite this method in the child class.")
+
+
+class Executor(ExecutorBase):
+    """For execution of command-line programs that do not have any specific executor themselves."""
+
+    def __init__(self, prefix_execution=None, binary_location=None):
+        super().__init__(
+            prefix_execution=prefix_execution,
+            binary_location=binary_location,
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=location,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        raise NotImplementedError(
+            "Cannot reliably check, whether a random program executes properly - do not use."
+        )
+
+
+def execution_successful(output: str, success_str: str) -> bool:
+    return True if success_str in output else False
diff --git a/icolos/utils/execute_external/fep_plus.py b/icolos/utils/execute_external/fep_plus.py
new file mode 100644
index 0000000..91f5437
--- /dev/null
+++ b/icolos/utils/execute_external/fep_plus.py
@@ -0,0 +1,48 @@
+from icolos.utils.execute_external.execute import ExecutorBase
+from icolos.utils.enums.program_parameters import (
+    FepPlusEnum,
+    SchrodingerExecutablesEnum,
+)
+
+FE = FepPlusEnum()
+SEE = SchrodingerExecutablesEnum()
+
+
+class FepPlusExecutor(ExecutorBase):
+    def __init__(self, prefix_execution=None, binary_location=None):
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        if command not in [
+            FE.FEP_MAPPER,
+            FE.FEP_EXECUTOR,
+            FE.JSC_LIST,
+            FE.JSC_TAIL_FILE,
+        ]:
+            raise ValueError(
+                "Execution command must be recognised by the executable's enum"
+            )
+
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=location,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        try:
+            result = self.execute(
+                command=FE.FEP_MAPPER, arguments=[FE.FEP_HELP], check=True
+            )
+            if FE.FEP_MAPPER_HELP_SUCCESS_STRING in result.stdout:
+                return True
+            return False
+        except Exception as e:
+            print(str(e))
+            return False
diff --git a/icolos/utils/execute_external/glide.py b/icolos/utils/execute_external/glide.py
new file mode 100644
index 0000000..5c7f05a
--- /dev/null
+++ b/icolos/utils/execute_external/glide.py
@@ -0,0 +1,44 @@
+from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum, GlideEnum
+from icolos.utils.execute_external.execute import ExecutorBase
+
+SEE = SchrodingerExecutablesEnum()
+EE = GlideEnum()
+
+
+class GlideExecutor(ExecutorBase):
+    """For the execution of the "glide" binary."""
+
+    def __init__(self, prefix_execution=None, binary_location=None):
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        # check, whether a proper executable is provided
+        if command not in [EE.GLIDE]:
+            raise ValueError(
+                "Parameter command must be an dictionary of the internal Glide executable list."
+            )
+
+        # Note: It seems in former times, the call "glide" had to be changed to "$SCHRODINGER/glide" here.
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=location,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        try:
+            result = self.execute(
+                command=EE.GLIDE, arguments=[EE.GLIDE_HELP], check=True
+            )
+
+            if EE.GLIDE_HELP_IDENTIFICATION_STRING in result.stdout:
+                return True
+            return False
+        except Exception as e:
+            return False
diff --git a/icolos/utils/execute_external/gromacs.py b/icolos/utils/execute_external/gromacs.py
new file mode 100644
index 0000000..72c1d01
--- /dev/null
+++ b/icolos/utils/execute_external/gromacs.py
@@ -0,0 +1,53 @@
+from icolos.utils.enums.program_parameters import GromacsEnum
+from icolos.utils.execute_external.execute import ExecutorBase
+
+_GE = GromacsEnum()
+
+
+class GromacsExecutor(ExecutorBase):
+    def __init__(self, prefix_execution=None, binary_location=None):
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        gmx_commands = [
+            _GE.EDITCONF,
+            _GE.GENION,
+            _GE.GROMPP,
+            _GE.SOLVATE,
+            _GE.MDRUN,
+            _GE.PDB2GMX,
+            _GE.MAKE_NDX,
+            _GE.GENRESTR,
+            _GE.TRJCONV,
+            _GE.TRJCAT,
+            _GE.CLUSTER,
+            _GE.MMPBSA,
+            _GE.DO_DSSP,
+            _GE.RMS,
+        ]
+
+        if not any([cmd in command for cmd in gmx_commands]):
+            raise ValueError(
+                "Command must be present in internal list of GROMACS executables"
+            )
+        # handle for dealing with programs that want interactive input
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=location,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        try:
+            result = self.execute(command=_GE.PDB2GMX, arguments=[], check=False)
+            if _GE.PDB2GMX_FAIL_ID_STRING in result.stderr:
+                return True
+            return False
+        except Exception as e:
+            return False
diff --git a/icolos/utils/execute_external/ifd_executor.py b/icolos/utils/execute_external/ifd_executor.py
new file mode 100644
index 0000000..9520013
--- /dev/null
+++ b/icolos/utils/execute_external/ifd_executor.py
@@ -0,0 +1,40 @@
+from icolos.utils.execute_external.execute import ExecutorBase
+from icolos.utils.enums.program_parameters import InducedFitEnum
+
+_IFE = InducedFitEnum()
+
+
+class IFDExecutor(ExecutorBase):
+    def __init__(self, prefix_execution=None, binary_location=None) -> None:
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+
+        if command not in [_IFE.IFD_EXEC]:
+            raise AssertionError(
+                "Commmand must be recognised in the internal dictionary"
+            )
+
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=location,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        try:
+            result = self.execute(
+                command=_IFE.IFD_EXEC, arguments=[_IFE.IFD_HELP], check=True
+            )
+
+            if _IFE.IFD_HELP_ID in result.stdout:
+                return True
+            return False
+        except Exception as e:
+            return False
diff --git a/icolos/utils/execute_external/license_token_guard.py b/icolos/utils/execute_external/license_token_guard.py
new file mode 100644
index 0000000..1cdc1cf
--- /dev/null
+++ b/icolos/utils/execute_external/license_token_guard.py
@@ -0,0 +1,127 @@
+import time
+from typing import Dict
+
+from pydantic import BaseModel, PrivateAttr
+
+from icolos.utils.execute_external.execute import Executor
+
+from icolos.loggers.steplogger import StepLogger
+
+from icolos.utils.enums.logging_enums import LoggingConfigEnum
+from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum
+
+_EE = SchrodingerExecutablesEnum()
+_LE = LoggingConfigEnum()
+
+
+class TokenGuardParameters(BaseModel):
+    prefix_execution: str = None
+    binary_location: str = None
+    token_pools: Dict
+    wait_interval_seconds: int = 30
+    wait_limit_seconds: int = 0
+
+
+class SchrodingerLicenseTokenGuard(BaseModel):
+    """Class that checks, whether enough tokens to execute Schrodinger binaries are available."""
+
+    token_guard: TokenGuardParameters
+
+    class Config:
+        underscore_attrs_are_private = True
+
+    _logger = PrivateAttr()
+    _executor = PrivateAttr()
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+        self._logger = StepLogger()
+
+        # initialize the executor for all "Schrodinger" related calls and also check if it is available
+        self._executor = Executor(
+            prefix_execution=self.token_guard.prefix_execution,
+            binary_location=self.token_guard.binary_location,
+        )
+
+    def _get_token_pool_info(self, licadmin_output: list, token_pool: str) -> dict:
+        result = {"found": False}
+        for line in licadmin_output:
+            if token_pool in line:
+                parts = line.split(" ")
+                if len(parts) == 16:
+                    result["total"] = int(parts[6])
+                    result["available"] = int(parts[6]) - int(parts[12])
+                    result["found"] = True
+                break
+        return result
+
+    def _check_licstat_output(self, licadmin_output: list) -> bool:
+        all_pools_available = True
+        for pool_key, pool_token_numbers in self.token_guard.token_pools.items():
+            pool_status = self._get_token_pool_info(licadmin_output, pool_key)
+            if pool_status["found"]:
+                if pool_status["available"] >= pool_token_numbers:
+                    self._logger.log(
+                        f"Enough tokens available ({pool_status['available']}) to satisfy requirement ({pool_token_numbers} free tokens) for pool {pool_key}.",
+                        _LE.DEBUG,
+                    )
+                else:
+                    self._logger.log(
+                        f"Not enough tokens available ({pool_status['available']}) to satisfy requirement ({pool_token_numbers} free tokens) for pool {pool_key}.",
+                        _LE.DEBUG,
+                    )
+                    all_pools_available = False
+            else:
+                all_pools_available = False
+                self._logger.log(
+                    f"Could not find information on token pool {pool_key}.", _LE.WARNING
+                )
+        return all_pools_available
+
+    def _get_licstat_output(self):
+        result = self._executor.execute(
+            command=_EE.LICADMIN, arguments=[_EE.LICADMIN_STAT], check=True
+        )
+        if result.returncode != 0:
+            self._logger.log(
+                f"Could not execute the Schrodinger license token guard - do you need to export the licadmin path?",
+                _LE.WARNING,
+            )
+        return result.stdout.split("\n")
+
+    def guard(self) -> bool:
+        # loop over the token pools until they are all satisfied or the time limit has run out
+        counter = 0
+        success = False
+        while True:
+            if (
+                self.token_guard.wait_limit_seconds != 0
+                and (counter * self.token_guard.wait_interval_seconds)
+                >= self.token_guard.wait_limit_seconds
+            ):
+                self._logger.log(
+                    f"Wait period ({self.token_guard.wait_limit_seconds} seconds) set for Schrodinger token guard has been exceeded.",
+                    _LE.ERROR,
+                )
+                break
+
+            # reload the output from "licadmin"
+            # at this stage, the output from licadmin is a list of strings
+            licadmin_output = self._get_licstat_output()
+
+            all_pools_available = self._check_licstat_output(
+                licadmin_output=licadmin_output
+            )
+            if all_pools_available:
+                self._logger.log(
+                    "All token pool requirements for Schrodinger have been met - proceeding.",
+                    _LE.DEBUG,
+                )
+                success = True
+                break
+            else:
+                time.sleep(self.token_guard.wait_interval_seconds)
+                counter = counter + 1
+
+        return success
diff --git a/icolos/utils/execute_external/ligprep.py b/icolos/utils/execute_external/ligprep.py
new file mode 100644
index 0000000..903d19f
--- /dev/null
+++ b/icolos/utils/execute_external/ligprep.py
@@ -0,0 +1,47 @@
+from icolos.utils.enums.program_parameters import (
+    SchrodingerExecutablesEnum,
+    LigprepEnum,
+)
+from icolos.utils.execute_external.execute import ExecutorBase
+
+SEE = SchrodingerExecutablesEnum()
+EE = LigprepEnum()
+
+
+class LigprepExecutor(ExecutorBase):
+    """For the execution of the "ligprep" binary."""
+
+    def __init__(self, prefix_execution=None, binary_location=None):
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        # check, whether a proper executable is provided
+        if command not in [EE.LIGPREP]:
+            raise ValueError(
+                "Parameter command must be an dictionary of the internal Ligprep executable list."
+            )
+
+        # Note: It seems in former times, the call "ligprep" had to be changed to "$SCHRODINGER/ligprep" here.
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=location,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        try:
+            result = self.execute(
+                command=EE.LIGPREP, arguments=[EE.LIGPREP_HELP], check=True
+            )
+
+            if EE.LIGPREP_HELP_IDENTIFICATION_STRING in result.stdout:
+                return True
+            return False
+        except Exception as e:
+            return False
diff --git a/icolos/utils/execute_external/macromodel.py b/icolos/utils/execute_external/macromodel.py
new file mode 100644
index 0000000..c3ef932
--- /dev/null
+++ b/icolos/utils/execute_external/macromodel.py
@@ -0,0 +1,46 @@
+from icolos.utils.enums.program_parameters import (
+    MacromodelEnum,
+    SchrodingerExecutablesEnum,
+)
+from icolos.utils.execute_external.execute import ExecutorBase
+
+SEE = SchrodingerExecutablesEnum()
+EE = MacromodelEnum()
+
+
+class MacromodelExecutor(ExecutorBase):
+    """For the execution of the "macromodel" binary."""
+
+    def __init__(self, prefix_execution=None, binary_location=None):
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        # check, whether a proper executable is provided
+        if command not in [EE.MACROMODEL]:
+            raise ValueError(
+                "Parameter command must be an dictionary of the internal Macromodel executable list."
+            )
+
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=location,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        try:
+            result = self.execute(
+                command=EE.MACROMODEL, arguments=[EE.MACROMODEL_HELP], check=True
+            )
+
+            if EE.MACROMODEL_HELP_IDENTIFICATION_STRING in result.stdout:
+                return True
+            return False
+        except Exception as e:
+            return False
diff --git a/icolos/utils/execute_external/omega.py b/icolos/utils/execute_external/omega.py
new file mode 100644
index 0000000..3a19b5c
--- /dev/null
+++ b/icolos/utils/execute_external/omega.py
@@ -0,0 +1,42 @@
+from icolos.utils.enums.program_parameters import OMEGAEnum
+from icolos.utils.execute_external.execute import ExecutorBase
+
+EE = OMEGAEnum()
+
+
+class OMEGAExecutor(ExecutorBase):
+    """For the execution of the "oeomega" binary."""
+
+    def __init__(self, prefix_execution=None, binary_location=None):
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        # check, whether a proper executable is provided
+        if command not in [EE.OMEGA]:
+            raise ValueError(
+                "Parameter command must be an dictionary of the internal Omega executable list."
+            )
+
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=None,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        try:
+            result = self.execute(
+                command=EE.OMEGA, arguments=[EE.OMEGA_HELP], check=True
+            )
+
+            if EE.OMEGA_HELP_IDENTIFICATION_STRING in result.stderr:
+                return True
+            return False
+        except Exception as e:
+            return False
diff --git a/icolos/utils/execute_external/openbabel.py b/icolos/utils/execute_external/openbabel.py
new file mode 100644
index 0000000..80bcab1
--- /dev/null
+++ b/icolos/utils/execute_external/openbabel.py
@@ -0,0 +1,43 @@
+import os
+import sys
+from icolos.utils.enums.program_parameters import OpenBabelEnum
+from icolos.utils.execute_external.execute import ExecutorBase
+
+EE = OpenBabelEnum()
+
+
+class OpenBabelExecutor(ExecutorBase):
+    """For the execution of the "obabel" binary."""
+
+    def __init__(self):
+        # in case the environment is not activated, add the path to the binary here
+        obabel_location = os.path.dirname(sys.executable)
+        super().__init__(prefix_execution=None, binary_location=obabel_location)
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        # check, whether a proper executable is provided
+        if command not in [EE.OBABEL]:
+            raise ValueError(
+                "Parameter command must be an element of the internal OpenBabel executable list."
+            )
+
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=location,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        # unfortunately, "obabel" does not return a meaningful return value (always '1'), so instead try to parse
+        # the "stdout" of the standard message; note, that "OpenBabel" is part of the environment and should always work
+        try:
+            result = self.execute(command=EE.OBABEL, arguments=[], check=False)
+            if EE.OBABEL_IDENTIFICATION_STRING in result.stdout:
+                return True
+            return False
+        except Exception as e:
+            return False
diff --git a/icolos/utils/execute_external/pmx.py b/icolos/utils/execute_external/pmx.py
new file mode 100644
index 0000000..c2c7fdd
--- /dev/null
+++ b/icolos/utils/execute_external/pmx.py
@@ -0,0 +1,54 @@
+from icolos.utils.enums.program_parameters import PMXEnum
+from icolos.utils.execute_external.execute import ExecutorBase
+
+_PE = PMXEnum()
+
+
+class PMXExecutor(ExecutorBase):
+    def __init__(self, prefix_execution=None, binary_location=None):
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        if command not in [
+            _PE.ABFE,
+            _PE.ANALYSE,
+            _PE.ATOMMAPPING,
+            _PE.DOUBLEBOX,
+            _PE.GENLIB,
+            _PE.GENTOP,
+            _PE.LIGANDHYBRID,
+            _PE.MUTATE,
+            _PE.BOX_WATER_IONS,
+            _PE.PREPARE_SIMULATIONS,
+            _PE.PREPARE_TRANSITIONS,
+            _PE.RUN_ANALYSIS,
+            _PE.RUN_SIMULATIONS,
+            _PE.ASSEMBLE_SYSTEMS,
+        ]:
+            raise ValueError(
+                "Command must be present in internal list of PMX executables."
+            )
+
+        # handle for dealing with programs that want interactive input
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=location,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        try:
+            result = self.execute(
+                command=_PE.ANALYSE, arguments=[_PE.ANALYSE_HELP], check=False
+            )
+            if _PE.ANALYSE_HELP_SUCCESS_STRING in result.stdout:
+                return True
+            return False
+        except Exception as e:
+            return False
diff --git a/icolos/utils/execute_external/prime.py b/icolos/utils/execute_external/prime.py
new file mode 100644
index 0000000..3d01075
--- /dev/null
+++ b/icolos/utils/execute_external/prime.py
@@ -0,0 +1,43 @@
+from icolos.utils.enums.program_parameters import PrimeEnum, SchrodingerExecutablesEnum
+from icolos.utils.execute_external.execute import ExecutorBase
+
+SEE = SchrodingerExecutablesEnum()
+EE = PrimeEnum()
+
+
+class PrimeExecutor(ExecutorBase):
+    """For the execution of the "prime_mmgbsa" binary."""
+
+    def __init__(self, prefix_execution=None, binary_location=None):
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        # check, whether a proper executable is provided
+        if command not in [EE.PRIME_MMGBSA]:
+            raise ValueError(
+                "Parameter command must be an dictionary of the internal Prime executable list."
+            )
+
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=location,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        try:
+            result = self.execute(
+                command=EE.PRIME_MMGBSA, arguments=[EE.PRIME_HELP], check=True
+            )
+
+            if EE.PRIME_HELP_IDENTIFICATION_STRING in result.stdout:
+                return True
+            return False
+        except Exception as e:
+            return False
diff --git a/icolos/utils/execute_external/schrodinger.py b/icolos/utils/execute_external/schrodinger.py
new file mode 100644
index 0000000..e4608e4
--- /dev/null
+++ b/icolos/utils/execute_external/schrodinger.py
@@ -0,0 +1,58 @@
+from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum
+from icolos.utils.execute_external.execute import ExecutorBase
+
+_SEE = SchrodingerExecutablesEnum()
+
+
+class SchrodingerExecutor(ExecutorBase):
+    """For the execution of Schrodinger's support entry points"""
+
+    def __init__(self, prefix_execution=None, binary_location=None):
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        # check, whether a proper executable is provided; update the calls to "$SCHRODINGER/XYZ"
+        if command == _SEE.STRUCTCAT:
+            command = _SEE.STRUCTCAT_CALL
+        elif command == _SEE.SDCONVERT:
+            command = _SEE.SDCONVERT_CALL
+        elif command == _SEE.STRUCT_SPLIT:
+            command = _SEE.STRUCT_SPLIT_CALL
+        elif command == _SEE.STRUCTCONVERT:
+            command = _SEE.STRUCTCONVERT_CALL
+        elif command == _SEE.FMP_STATS:
+            command = _SEE.FMP_STATS_CALL
+        elif command == _SEE.PREPWIZARD:
+            command = _SEE.PREPWIZARD_CALL
+        elif command == _SEE.MULTISIM_EXEC:
+            command = _SEE.MULTISIM_EXEC
+        else:
+            raise ValueError(
+                "Parameter command must be an dictionary of the internal Schrodinger entry point list."
+            )
+
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=location,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        try:
+            result = self.execute(
+                command=_SEE.STRUCTCAT, arguments=[_SEE.STRUCTCAT_HELP], check=True
+            )
+
+            if _SEE.STRUCTCAT_HELP_IDENTIFICATION_STRING in result.stdout:
+                return True
+            return False
+        except Exception as e:
+
+            print(e)
+            return False
diff --git a/icolos/utils/execute_external/sdconvert.py b/icolos/utils/execute_external/sdconvert.py
new file mode 100644
index 0000000..af848f8
--- /dev/null
+++ b/icolos/utils/execute_external/sdconvert.py
@@ -0,0 +1,46 @@
+from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum
+from icolos.utils.execute_external.execute import ExecutorBase
+
+SEE = SchrodingerExecutablesEnum()
+
+
+class SDConvertExecutor(ExecutorBase):
+    """For the execution of the "sdconvert" binary."""
+
+    def __init__(self, prefix_execution=None, binary_location=None):
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        # check, whether a proper executable is provided
+        if command not in [SEE.SDCONVERT]:
+            raise ValueError(
+                "Parameter command must be an dictionary of the internal sdconvert executable list."
+            )
+
+        # take care of the special path to "sdconvert"
+        if command == SEE.SDCONVERT:
+            command = SEE.SDCONVERT_CALL
+
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=None,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        try:
+            result = self.execute(
+                command=SEE.SDCONVERT, arguments=SEE.SDCONVERT_HELP, check=False
+            )
+
+            if SEE.SDCONVERT_HELP_IDENTIFICATION_STRING in result.stdout:
+                return True
+            return False
+        except Exception as e:
+            return False
diff --git a/icolos/utils/execute_external/structcat.py b/icolos/utils/execute_external/structcat.py
new file mode 100644
index 0000000..fced5d0
--- /dev/null
+++ b/icolos/utils/execute_external/structcat.py
@@ -0,0 +1,46 @@
+from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum
+from icolos.utils.execute_external.execute import ExecutorBase
+
+SEE = SchrodingerExecutablesEnum()
+
+
+class StructcatExecutor(ExecutorBase):
+    """For the execution of the "structcat" binary."""
+
+    def __init__(self, prefix_execution=None, binary_location=None):
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        # check, whether a proper executable is provided
+        if command not in [SEE.STRUCTCAT]:
+            raise ValueError(
+                "Parameter command must be an dictionary of the internal structcat executable list."
+            )
+
+        # take care of the special path to "structcat"
+        if command == SEE.STRUCTCAT:
+            command = SEE.STRUCTCAT_CALL
+
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=None,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        try:
+            result = self.execute(
+                command=SEE.STRUCTCAT, arguments=SEE.STRUCTCAT_HELP, check=False
+            )
+
+            if SEE.STRUCTCAT_HELP_IDENTIFICATION_STRING in result.stdout:
+                return True
+            return False
+        except Exception as e:
+            return False
diff --git a/icolos/utils/execute_external/turbomole.py b/icolos/utils/execute_external/turbomole.py
new file mode 100644
index 0000000..5eaac64
--- /dev/null
+++ b/icolos/utils/execute_external/turbomole.py
@@ -0,0 +1,62 @@
+import os
+import shutil
+import tempfile
+
+from icolos.utils.enums.program_parameters import TurbomoleEnum
+from icolos.utils.execute_external.execute import ExecutorBase
+
+EE = TurbomoleEnum()
+
+
+class TurbomoleExecutor(ExecutorBase):
+    """For the execution of the "turbomole" binary."""
+
+    def __init__(self, prefix_execution=None, binary_location=None):
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        # check, whether a proper executable is provided
+        if command not in [
+            EE.TM_COSMOPREP,
+            EE.TM_DEFINE,
+            EE.TM_RIDFT,
+            EE.TM_X2T,
+            EE.TM_T2X,
+            EE.CT_COSMOTHERM,
+            EE.TM_JOBEX,
+        ]:
+            raise ValueError(
+                "Parameter command must be an dictionary of the internal Turbomole executable list."
+            )
+
+        # TM accesses a folder specified in $TURBOTMPDIR to deposit the particular temporary files for a run; this is
+        # system-wide, so parallel runs will interfere; also it is not removed automatically
+        # TODO: find a more elegant solution; is this really necessary for all binaries or only "ridft" and "jobex"?
+        tmp_dir = tempfile.mkdtemp()
+        command = "".join(["export TURBOTMPDIR=", tmp_dir, " && ", command])
+
+        result = super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=location,
+            pipe_input=pipe_input,
+        )
+
+        if tmp_dir is not None and os.path.isdir(tmp_dir):
+            shutil.rmtree(tmp_dir)
+        return result
+
+    def is_available(self):
+        try:
+            result = self.execute(command=EE.TM_RIDFT, arguments=[], check=True)
+
+            if EE.TM_RIDFT_FAIL_IDENTIFICATION_STRING in result.stderr:
+                return True
+            return False
+        except Exception as e:
+            return False
diff --git a/icolos/utils/execute_external/xtb.py b/icolos/utils/execute_external/xtb.py
new file mode 100644
index 0000000..5847cff
--- /dev/null
+++ b/icolos/utils/execute_external/xtb.py
@@ -0,0 +1,40 @@
+from icolos.utils.enums.program_parameters import XTBEnum
+from icolos.utils.execute_external.execute import ExecutorBase
+
+EE = XTBEnum()
+
+
+class XTBExecutor(ExecutorBase):
+    """For the execution of the "xtb" binary."""
+
+    def __init__(self, prefix_execution=None, binary_location=None):
+        super().__init__(
+            prefix_execution=prefix_execution, binary_location=binary_location
+        )
+
+    def execute(
+        self, command: str, arguments: list, check=True, location=None, pipe_input=None
+    ):
+        # check, whether a proper executable is provided
+        if command not in [EE.XTB]:
+            raise ValueError(
+                "Parameter command must be an dictionary of the internal XTB executable list."
+            )
+
+        return super().execute(
+            command=command,
+            arguments=arguments,
+            check=check,
+            location=None,
+            pipe_input=pipe_input,
+        )
+
+    def is_available(self):
+        try:
+            result = self.execute(command=EE.XTB, arguments=[EE.XTB_HELP], check=True)
+
+            if EE.XTB_HELP_IDENTIFICATION_STRING in result.stderr:
+                return True
+            return False
+        except Exception as e:
+            return False
diff --git a/icolos/utils/general/__init__.py b/icolos/utils/general/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/utils/general/arparse_bool_extension.py b/icolos/utils/general/arparse_bool_extension.py
new file mode 100644
index 0000000..96cb54d
--- /dev/null
+++ b/icolos/utils/general/arparse_bool_extension.py
@@ -0,0 +1,14 @@
+import argparse
+
+
+def str2bool(inp):
+    if isinstance(inp, bool):
+        return inp
+    if inp.lower() in ("yes", "true", "t", "y", "1"):
+        return True
+    elif inp.lower() in ("no", "false", "f", "n", "0"):
+        return False
+    else:
+        raise argparse.ArgumentTypeError(
+            "Expected castable string or boolean value as input."
+        )
diff --git a/icolos/utils/general/convenience_functions.py b/icolos/utils/general/convenience_functions.py
new file mode 100644
index 0000000..8a0fbba
--- /dev/null
+++ b/icolos/utils/general/convenience_functions.py
@@ -0,0 +1,60 @@
+from icolos.utils.general.files_paths import attach_root_path
+
+
+# dictionary convenience functions
+# ---------
+
+
+def nested_get(dictionary: dict, keys: list, default=None):
+    # recursively walk through nested dict, pull the value corresponding to the specified key(s)
+    if not isinstance(keys, list):
+        keys = [keys]
+    if dictionary is None:
+        return default
+    if not keys:
+        return dictionary
+    return nested_get(dictionary.get(keys[0]), keys[1:], default)
+
+
+def in_keys(dictionary: dict, keys: list) -> bool:
+    if not isinstance(keys, list):
+        keys = [keys]
+
+    _dict = dictionary
+    for key in keys:
+        try:
+            _dict = _dict[key]
+        except KeyError:
+            return False
+    return True
+
+
+# parsing "setup.py"
+# ---------
+
+
+def parse_setuppy():
+    path = attach_root_path("setup.py")
+    parsed_dict = {}
+    with open(path, "r") as f:
+        lines = f.readlines()
+        for line in lines:
+            if "name" in line:
+                parsed_dict["name"] = line[line.find('"') + len('"') : line.rfind('"')]
+            if "version" in line:
+                parsed_dict["version"] = line[
+                    line.find('"') + len('"') : line.rfind('"')
+                ]
+            if "license" in line:
+                parsed_dict["license"] = line[
+                    line.find('"') + len('"') : line.rfind('"')
+                ]
+            if "author" in line:
+                parsed_dict["author"] = line[
+                    line.find('"') + len('"') : line.rfind('"')
+                ]
+    return parsed_dict
+
+
+def ensure_list(inp) -> list:
+    return inp if isinstance(inp, list) else [inp]
diff --git a/icolos/utils/general/files_paths.py b/icolos/utils/general/files_paths.py
new file mode 100644
index 0000000..9c808c6
--- /dev/null
+++ b/icolos/utils/general/files_paths.py
@@ -0,0 +1,105 @@
+import os
+import shutil
+import time
+import json
+import tempfile
+from typing import Tuple
+
+from icolos.utils.enums.step_enums import StepBaseEnum
+from icolos.utils.enums.general_utils_enums import CheckFileGenerationEnum
+
+_SE = StepBaseEnum
+_FG = CheckFileGenerationEnum()
+
+
+def check_file_availability(
+    path: str, interval_sec: int = 1, maximum_sec: int = 10
+) -> str:
+    counter = 0
+    while not os.path.exists(path):
+        # wait for an interval
+        time.sleep(interval_sec)
+        counter = counter + 1
+
+        # if there's time left, proceed
+        if maximum_sec is not None and (counter * interval_sec) > maximum_sec:
+            break
+    if os.path.exists(path):
+        if os.path.getsize(path) == 0:
+            return _FG.GENERATED_EMPTY
+        else:
+            return _FG.GENERATED_SUCCESS
+    else:
+        return _FG.NOT_GENERATED
+
+
+def remove_folder(folder_path: str):
+    if os.path.isdir(folder_path):
+        shutil.rmtree(folder_path)
+
+
+def empty_output_dir(path: str):
+    for root, subf, files in os.walk(path):
+        for file in files:
+            os.remove(os.path.join(root, file))
+
+
+def move_up_directory(path, n=1):
+    """Function, to move up 'n' directories for a given "path"."""
+    # add +1 to take file into account
+    if os.path.isfile(path):
+        n += 1
+    for _ in range(n):
+        path = os.path.dirname(os.path.abspath(path))
+    return path
+
+
+def attach_root_path(path):
+    """Function to attach the root path of the module for a given "path"."""
+    ROOT_DIR = move_up_directory(os.path.abspath(__file__), n=3)
+    return os.path.join(ROOT_DIR, path)
+
+
+def lines_in_file(path):
+    with open(path) as f:
+        for i, l in enumerate(f):
+            pass
+    return i + 1
+
+
+def dict_from_json_file(path):
+    with open(path, "r") as f:
+        return json.load(f)
+
+
+def any_in_file(path, strings):
+    if isinstance(strings, str):
+        strings = [strings]
+    if os.path.isfile(path):
+        with open(path, "r") as f:
+            file_raw = f.readlines()
+            for string in strings:
+                if any(string in line for line in file_raw):
+                    return True
+            return False
+    else:
+        return False
+
+
+def infer_input_type(path: str) -> str:
+    basename = os.path.basename(path)
+    ending = basename[-3:].upper()
+    if ending in [_SE.FORMAT_SDF, _SE.FORMAT_CSV, _SE.FORMAT_SMI]:
+        return ending
+    else:
+        raise ValueError(f"Ending {ending} not supported.")
+
+
+def gen_tmp_file(
+    suffix: str = None, prefix: str = None, dir: str = None, text: bool = True
+) -> Tuple[str, str]:
+    """Function wraps tempfile.mkstemp(), but closes the connection and returns the file name instead of the handler."""
+    # note that in contrast to the underlying "mkstemp" function, "text" is set to True here
+    fhandle, path = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir, text=text)
+    os.close(fhandle)
+    return os.path.basename(path), path
diff --git a/icolos/utils/general/icolos_exceptions.py b/icolos/utils/general/icolos_exceptions.py
new file mode 100644
index 0000000..c79f0e3
--- /dev/null
+++ b/icolos/utils/general/icolos_exceptions.py
@@ -0,0 +1,25 @@
+class ExecutionFailed(Exception):
+    pass
+
+
+class StepFailed(Exception):
+    pass
+
+
+class ContainerCorrupted(Exception):
+    pass
+
+
+def get_exception_message(e: Exception):
+    if e is None:
+        return None
+    if hasattr(e, "message"):
+        return e.message
+    else:
+        return e
+
+
+def get_exception_type(e: Exception) -> str:
+    if e is None:
+        return None
+    return type(e).__name__
diff --git a/icolos/utils/general/molecules.py b/icolos/utils/general/molecules.py
new file mode 100644
index 0000000..e681273
--- /dev/null
+++ b/icolos/utils/general/molecules.py
@@ -0,0 +1,20 @@
+from rdkit import Chem
+from rdkit.Chem import rdmolops
+
+from icolos.utils.enums.compound_enums import CompoundTagsEnum
+
+
+def get_charge_for_molecule(molecule: Chem.Mol, add_as_tag=False) -> int:
+    _MTE = CompoundTagsEnum()
+    charge = rdmolops.GetFormalCharge(molecule)
+    if add_as_tag:
+        molecule.SetProp(_MTE.FORMAL_CHARGE_TAG, str(charge))
+    return charge
+
+
+def write_molecule_to_sdf(path: str, molecule: Chem.Mol):
+    if molecule is None or not isinstance(molecule, Chem.Mol):
+        raise ValueError("Function requires input attribute to be an RDkit molecule.")
+    writer = Chem.SDWriter(path)
+    writer.write(molecule)
+    writer.close()
diff --git a/icolos/utils/general/notifications.py b/icolos/utils/general/notifications.py
new file mode 100644
index 0000000..e69de29
diff --git a/icolos/utils/general/parallelization.py b/icolos/utils/general/parallelization.py
new file mode 100644
index 0000000..6978a25
--- /dev/null
+++ b/icolos/utils/general/parallelization.py
@@ -0,0 +1,159 @@
+import math
+import multiprocessing
+from typing import List, Callable, Dict, Any
+from pydantic import BaseModel
+from icolos.utils.enums.parallelization import ParallelizationEnum
+
+_PE = ParallelizationEnum
+
+
+class Subtask(BaseModel):
+    status: _PE = _PE.STATUS_READY
+    times_tried: int = 0
+    data: Any
+
+    def increment_tries(self):
+        self.times_tried += 1
+
+    def set_status(self, status: str):
+        self.status = status
+
+    def set_status_failed(self):
+        self.set_status(_PE.STATUS_FAILED)
+
+    def set_status_success(self):
+        self.set_status(_PE.STATUS_SUCCESS)
+
+
+class SubtaskContainer(BaseModel):
+    max_tries: int
+    subtasks: List[Subtask] = []
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def clear(self):
+        self.subtasks = []
+
+    def load_data(self, data: List[Any]):
+        self.clear()
+        self.add_data(data=data)
+
+    def add_data(self, data: List[Any]):
+        for data_element in data:
+            self.subtasks.append(
+                Subtask(status=_PE.STATUS_READY, times_tried=0, data=data_element)
+            )
+
+    def get_todo_tasks(self) -> List[Subtask]:
+        todo_subtasks = []
+        for subtask in self.subtasks:
+            if (
+                subtask.status == _PE.STATUS_READY
+                or subtask.status == _PE.STATUS_FAILED
+            ) and subtask.times_tried < self.max_tries:
+                todo_subtasks.append(subtask)
+        return todo_subtasks
+
+    def get_done_tasks(self) -> List[Subtask]:
+        done_subtasks = []
+        for subtask in self.subtasks:
+            if (
+                subtask.status == _PE.STATUS_SUCCESS
+                or subtask.times_tried >= self.max_tries
+            ):
+                done_subtasks.append(subtask)
+        return done_subtasks
+
+    def get_sublists(
+        self, partitions=None, slice_size=None, get_first_n_lists=None
+    ) -> List[List[Subtask]]:
+        if partitions is None and slice_size is None:
+            raise ValueError("Either specify partitions or slice size.")
+
+        # only get tasks that are not yet completed or have some tries left
+        subtasks = self.get_todo_tasks()
+
+        # decide on the chunk size, either by doing partitions or by specifying the slice size directly
+        sublists = []
+        if partitions is not None:
+            chunk_size = int(math.ceil(len(subtasks) / partitions))
+        else:
+            chunk_size = slice_size
+
+        # wrap the tasks in lists as required
+        for i in range(0, len(subtasks), chunk_size):
+            sublist = []
+            for j in range(i, min(i + chunk_size, len(subtasks))):
+                sublist.append(subtasks[j])
+            sublists.append(sublist)
+
+        if get_first_n_lists is not None and len(sublists) > get_first_n_lists:
+            return sublists[:get_first_n_lists]
+        else:
+            return sublists
+
+    def done(self) -> bool:
+        for subtask in self.subtasks:
+            if subtask.status == _PE.STATUS_SUCCESS:
+                continue
+            if subtask.status == _PE.STATUS_READY or (
+                subtask.status == _PE.STATUS_FAILED
+                and subtask.times_tried < self.max_tries
+            ):
+                return False
+        return True
+
+    def any_failed(self) -> bool:
+        if len(
+            [True for subtask in self.subtasks if subtask.status == _PE.STATUS_FAILED]
+        ):
+            return True
+        return False
+
+    def set_max_tries(self, max_tries: int):
+        self.max_tries = max_tries
+
+    def __len__(self) -> int:
+        return len(self.subtasks)
+
+
+class Parallelizer(BaseModel):
+    func: Callable
+    collect_rtn_codes: bool = False
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    def rearrange_input(self, inp_dict: Dict[str, List]) -> List[Dict]:
+        return [dict(zip(inp_dict, ele)) for ele in zip(*inp_dict.values())]
+
+    def execute_parallel(self, **kwargs):
+        # translate the dictionary with the lists of arguments into a list of individual dictionaries
+        # e.g. {'one': [1, 2, 3], 'two': ['aaaa', 'bbb', 'cc'], 'three': [0.2, 0.2, 0.1]} --->
+        # [{'one': 1, 'two': 'aaaa', 'three': 0.2},
+        #  {'one': 2, 'two': 'bbb', 'three': 0.2},
+        #  {'one': 3, 'two': 'cc', 'three': 0.1}]
+        list_exec = self.rearrange_input(kwargs)
+
+        # run in parallel; wait for all subjobs to finish before proceeding
+        # Optional mechanism for collecting return code from subprocessees
+        if self.collect_rtn_codes:
+            manager = multiprocessing.Manager()
+            q = manager.dict()
+            for subprocess_args in list_exec:
+                subprocess_args["q"] = q
+        # rtn_codes = []
+        processes = []
+        for subprocess_args in list_exec:
+            p = multiprocessing.Process(target=self.func, kwargs=subprocess_args)
+            processes.append(p)
+            p.start()
+        # for p in processes:
+        #     ret = q.get()
+        #     rtn_codes.append(ret)
+        for p in processes:
+            p.join()
+
+        if self.collect_rtn_codes:
+            return q.values()
diff --git a/icolos/utils/general/print_log.py b/icolos/utils/general/print_log.py
new file mode 100644
index 0000000..8324b1a
--- /dev/null
+++ b/icolos/utils/general/print_log.py
@@ -0,0 +1,14 @@
+import os
+from icolos.loggers.blank_logger import BlankLogger
+
+
+def print_log_file(path: str, logger, level):
+    logger_blank = BlankLogger()
+    if os.path.isfile(path):
+        with open(path, "r") as log_file:
+            log_file_raw = log_file.readlines()
+            logger.log(f"Printing log file {path}:\n", level)
+            for line in log_file_raw:
+                logger_blank.log(line.rstrip("\n"), level)
+            logger_blank.log("", level)
+            logger.log("--- End file", level)
diff --git a/icolos/utils/general/progress_bar.py b/icolos/utils/general/progress_bar.py
new file mode 100644
index 0000000..1c67ed6
--- /dev/null
+++ b/icolos/utils/general/progress_bar.py
@@ -0,0 +1,7 @@
+def get_progress_bar_string(
+    done, total, prefix="", suffix="", decimals=1, length=100, fill="█"
+):
+    percent = ("{0:." + str(decimals) + "f}").format(100 * (done / float(total)))
+    filledLength = int(length * done // total)
+    bar = fill * filledLength + "-" * (length - filledLength)
+    return f"{prefix}|{bar}| {percent}% {suffix}"
diff --git a/icolos/utils/general/strings.py b/icolos/utils/general/strings.py
new file mode 100644
index 0000000..77ae4ca
--- /dev/null
+++ b/icolos/utils/general/strings.py
@@ -0,0 +1,8 @@
+def stringify(obj):
+    """Converts all objects in a dict to str, recursively."""
+    if isinstance(obj, dict):
+        return {str(key): stringify(value) for key, value in obj.items()}
+    elif isinstance(obj, list):
+        return [stringify(value) for value in obj]
+    else:
+        return str(obj)
diff --git a/icolos/utils/smiles.py b/icolos/utils/smiles.py
new file mode 100644
index 0000000..f97d4f3
--- /dev/null
+++ b/icolos/utils/smiles.py
@@ -0,0 +1,145 @@
+import random
+
+import rdkit.Chem as rkc
+from rdkit import Chem
+from rdkit.Chem import AllChem
+from rdkit.Chem import SaltRemover
+from rdkit.Chem import rdmolops
+
+
+def _initialiseNeutralisationReactions():
+    patts = (
+        # Imidazoles
+        ("[n+;H]", "n"),
+        # Amines
+        ("[N+;!H0]", "N"),
+        # Carboxylic acids and alcohols
+        ("[$([O-]);!$([O-][#7])]", "O"),
+        # Thiols
+        ("[S-;X1]", "S"),
+        # Sulfonamides
+        ("[$([N-;X2]S(=O)=O)]", "N"),
+        # Enamines
+        ("[$([N-;X2][C,N]=C)]", "N"),
+        # Tetrazoles
+        ("[n-]", "[nH]"),
+        # Sulfoxides
+        ("[$([S-]=O)]", "S"),
+        # Amides
+        ("[$([N-]C=O)]", "N"),
+    )
+    return [(Chem.MolFromSmarts(x), Chem.MolFromSmiles(y, False)) for x, y in patts]
+
+
+def _neutralise_charges(mol, reactions=None):
+    if reactions is None:
+        reactions = _initialiseNeutralisationReactions()
+    replaced = False
+    for i, (reactant, product) in enumerate(reactions):
+        while mol.HasSubstructMatch(reactant):
+            replaced = True
+            rms = AllChem.ReplaceSubstructs(mol, reactant, product)
+            mol = rms[0]
+    if replaced:
+        return mol, True
+    else:
+        return mol, False
+
+
+def _get_largest_fragment(mol):
+    frags = rdmolops.GetMolFrags(mol, asMols=True, sanitizeFrags=True)
+    maxmol = None
+    for mol in frags:
+        if mol is None:
+            continue
+        if maxmol is None:
+            maxmol = mol
+        if maxmol.GetNumHeavyAtoms() < mol.GetNumHeavyAtoms():
+            maxmol = mol
+    return maxmol
+
+
+_saltremover = SaltRemover.SaltRemover()
+
+
+def _valid_size(
+    mol, min_heavy_atoms, max_heavy_atoms, element_list, remove_long_side_chains
+):
+    """Filters molecules on number of heavy atoms and atom types"""
+    mol = _rare_filters(mol)
+    if mol:
+        correct_size = min_heavy_atoms < mol.GetNumHeavyAtoms() < max_heavy_atoms
+        if not correct_size:
+            return
+
+        valid_elements = all(
+            [atom.GetAtomicNum() in element_list for atom in mol.GetAtoms()]
+        )
+        if not valid_elements:
+            return
+
+        has_long_sidechains = False
+        if remove_long_side_chains:
+            # remove aliphatic side chains with at least 5 carbons not in a ring
+            sma = "[CR0]-[CR0]-[CR0]-[CR0]-[CR0]"
+            has_long_sidechains = mol.HasSubstructMatch(Chem.MolFromSmarts(sma))
+
+        return correct_size and valid_elements and not has_long_sidechains
+
+
+def _rare_filters(mol):
+    if mol:
+        ciano_filter = "[C-]#[N+]"
+        oh_filter = "[OH+]"
+        sulfur_filter = "[SH]"
+        if (
+            not mol.HasSubstructMatch(Chem.MolFromSmarts(ciano_filter))
+            and not mol.HasSubstructMatch(Chem.MolFromSmarts(oh_filter))
+            and not mol.HasSubstructMatch(Chem.MolFromSmarts(sulfur_filter))
+        ):
+            return mol
+
+
+def convert_to_rdkit_smiles(smiles):
+    return Chem.MolToSmiles(
+        Chem.MolFromSmiles(smiles, sanitize=False), isomericSmiles=True
+    )
+
+
+def randomize_smiles(smiles, random_type="restricted"):
+    """
+    Returns a random SMILES given a SMILES of a molecule.
+    :param random_type: The type (unrestricted, restricted) of randomization performed.
+    :return : A random SMILES string of the same molecule or None if the molecule is invalid.
+    """
+    mol = Chem.MolFromSmiles(smiles)
+    if not mol:
+        return None
+
+    if random_type == "unrestricted":
+        return rkc.MolToSmiles(mol, canonical=False, doRandom=True, isomericSmiles=True)
+    if random_type == "restricted":
+        new_atom_order = list(range(mol.GetNumHeavyAtoms()))
+        random.shuffle(new_atom_order)
+        random_mol = rkc.RenumberAtoms(mol, newOrder=new_atom_order)
+        return rkc.MolToSmiles(random_mol, canonical=False, isomericSmiles=True)
+    raise ValueError("Type '{}' is not valid".format(random_type))
+
+
+def to_mol(smi):
+    """
+    Creates a Mol object from a SMILES string.
+    :param smi: SMILES string.
+    :return: A Mol object or None if it's not valid.
+    """
+    if smi:
+        return rkc.MolFromSmiles(smi)
+
+
+def to_smiles(mol):
+    """
+    Converts a Mol object into a canonical SMILES string.
+    :param mol: Mol object.
+    :return: A SMILES string.
+    """
+    return rkc.MolToSmiles(mol, isomericSmiles=True)
diff --git a/icolos_workflow.py b/icolos_workflow.py
new file mode 100644
index 0000000..d9ba5bd
--- /dev/null
+++ b/icolos_workflow.py
@@ -0,0 +1,65 @@
+from typing import Dict
+from pydantic import BaseModel
+import os
+import sys
+
+from icolos.core.composite_agents.workflow import WorkFlow
+from icolos.utils.enums.logging_enums import LoggingConfigEnum
+
+from icolos.utils.entry_point_functions.logging_helper_functions import (
+    initialize_logging,
+)
+from icolos.utils.entry_point_functions.parsing_functions import (
+    get_runtime_global_variables,
+    add_global,
+)
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.utils.enums.composite_agents_enums import WorkflowEnum
+
+_WE = WorkflowEnum()
+_LE = LoggingConfigEnum()
+
+
+class IcolosWorkflow(BaseModel):
+    """
+    Alternative programmatic entrypoint for the Icolos workflow
+    """
+
+    config: Dict = None
+    workflow: WorkFlow = None
+    logging: str = None
+    global_vars: Dict = None
+
+    def __init__(self, config, global_vars: Dict = None) -> None:
+        super().__init__(**config)
+
+        self.config = self._parse_global_vars(config, global_vars)
+        self.workflow = WorkFlow(**config[_WE.WORKFLOW])
+        # tutorial settings logs everything to stdout as well as the file
+        self.logging = "tutorial"
+
+    def _initialize_logging(self):
+        log_conf = attach_root_path(_LE.PATH_CONFIG_TUTORIAL)
+        logger = initialize_logging(log_conf_path=log_conf, workflow_conf=self.config)
+        return logger
+
+    def _parse_global_vars(self, config, global_vars):
+        # substitute global vars throughout the config file, return modified config
+
+        if global_vars is not None:
+            config = add_global(config, global_vars, _WE.GLOBAL_VARIABLES)
+        config = add_global(
+            config,
+            get_runtime_global_variables(
+                os.path.join(os.getcwd(), "config.json"), os.path.realpath(__file__)
+            ),
+            _WE.GLOBAL_VARIABLES,
+        )
+        return config
+
+    def execute(self):
+        self._initialize_logging()
+        self.workflow.initialize()
+        self.workflow.execute()
+
+        sys.exit(0)
diff --git a/integration_tests.py b/integration_tests.py
new file mode 100644
index 0000000..a57f659
--- /dev/null
+++ b/integration_tests.py
@@ -0,0 +1,5 @@
+from tests.integration_tests import *
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/licences/espsim_licence.txt b/licences/espsim_licence.txt
new file mode 100644
index 0000000..a5c8ed3
--- /dev/null
+++ b/licences/espsim_licence.txt
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 Esther Heid
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..07de284
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
\ No newline at end of file
diff --git a/sdf2smi.py b/sdf2smi.py
new file mode 100644
index 0000000..dbefbbc
--- /dev/null
+++ b/sdf2smi.py
@@ -0,0 +1,99 @@
+import os
+import pandas as pd
+import argparse
+
+import rdkit.Chem as Chem
+
+from icolos.core.step_utils.rdkit_utils import to_smiles
+
+
+if __name__ == "__main__":
+
+    # get the input parameters and parse them
+    parser = argparse.ArgumentParser(
+        description="Implements simple translator taking an SDF file and spitting out SMILES."
+    )
+    parser.add_argument("-sdf", type=str, default=None, help="A path a SDF file.")
+    parser.add_argument(
+        "-smi",
+        type=str,
+        default=None,
+        required=False,
+        help="A path an output text file.",
+    )
+    parser.add_argument(
+        "-csv",
+        type=str,
+        default=None,
+        required=False,
+        help="A path an output CSV file.",
+    )
+    parser.add_argument(
+        "-keep_stereo",
+        action="store_true",
+        help="If set, exported SMILES contain stereo-information.",
+    )
+    parser.add_argument(
+        "-tags2columns",
+        type=str,
+        nargs="+",
+        default=None,
+        required=False,
+        help="A list of strings for which tags should be transformed into columns.",
+    )
+    args = parser.parse_args()
+
+    if args.sdf is None or not os.path.isfile(args.sdf):
+        raise Exception(
+            'Parameter "-sdf" must be a relative or absolute path to valid sdf file.'
+        )
+    if args.smi is None and args.csv is None:
+        raise Exception(
+            'At least one of the "-smi" or "-csv" output paths must be set.'
+        )
+
+    molecules = []
+    for mol in Chem.SDMolSupplier(args.sdf):
+        if mol is None:
+            continue
+        molecules.append(mol)
+
+    # write out
+    # ---------
+    if args.smi is not None:
+        with open(args.smi, "w") as smi_file:
+            for mol in molecules:
+                smi_file.write(to_smiles(mol, isomericSmiles=args.keep_stereo) + "\n")
+
+    if args.csv is not None:
+        data_buffer = []
+        columns = ["Name", "SMILES"]
+        tags2columns = []
+        if args.tags2columns is not None:
+            tags2columns = args.tags2columns
+            columns = columns + tags2columns
+        for mol in molecules:
+            # add default columns for this row
+            row = [
+                mol.GetProp("_Name"),
+                to_smiles(mol, isomericSmiles=args.keep_stereo),
+            ]
+
+            # add selected columns for this row (if specified)
+            for tag in tags2columns:
+                try:
+                    row.append(mol.GetProp(tag))
+                except KeyError:
+                    row.append(None)
+
+            data_buffer.append(row)
+        df_writeout = pd.DataFrame(data_buffer, columns=columns)
+        df_writeout.to_csv(
+            path_or_buf=args.csv,
+            sep=",",
+            na_rep="",
+            header=True,
+            index=False,
+            mode="w",
+            quoting=None,
+        )
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..3a54d9c
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,17 @@
+from setuptools import setup, find_packages
+
+setup(
+    name="icolos",
+    maintainer="Christian Margreitter, Harry Moore",
+    version="1.4.0",
+    packages=find_packages("."),
+    include_package_data=True,
+    package_dir={"config": "icolos/config"},
+    package_data={"icolos": ["config/logging/*.json"]},
+    description="Icolos Workflow Manager",
+    entry_points="""
+		[console_scripts]
+		icolos=icolos.scripts.cli:entry_point
+  	""",
+    python_requires=">=3.8",
+)
diff --git a/tests/CREST/__init__.py b/tests/CREST/__init__.py
new file mode 100644
index 0000000..adaeac4
--- /dev/null
+++ b/tests/CREST/__init__.py
@@ -0,0 +1 @@
+from tests.CREST.test_CREST_confgen import *
diff --git a/tests/CREST/test_CREST_confgen.py b/tests/CREST/test_CREST_confgen.py
new file mode 100644
index 0000000..c8046d5
--- /dev/null
+++ b/tests/CREST/test_CREST_confgen.py
@@ -0,0 +1,115 @@
+import unittest
+import os
+
+from icolos.core.workflow_steps.confgen.crest import StepCREST
+
+from icolos.utils.enums.step_enums import StepBaseEnum
+from icolos.utils.enums.program_parameters import CrestEnum
+
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    MAIN_CONFIG,
+    export_unit_test_env_vars,
+    get_mol_as_Compound,
+)
+from icolos.utils.general.files_paths import attach_root_path
+
+_SBE = StepBaseEnum
+_CE = CrestEnum()
+
+
+class Test_CREST_confgen(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/CREST")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        self._paracetamol_molecule = get_mol_as_Compound(
+            PATHS_EXAMPLEDATA.PARACETAMOL_PATH
+        )
+        self._aspirin_molecule = get_mol_as_Compound(PATHS_EXAMPLEDATA.ASPIRIN_PATH)
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_coordinate_generation_neutral(self):
+        step_conf = {
+            _SBE.STEPID: "01_conf_gen_crest",
+            _SBE.STEP_TYPE: _SBE.STEP_CREST,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: None,
+                _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["CREST_BINARY_LOCATION"],
+                _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 7},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-niceprint"],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _CE.CREST_OPT: "normal",
+                        _CE.CREST_G: "h2o",
+                        _CE.CREST_RTHR: 0.5,
+                        _CE.CREST_ETHR: 0.25,
+                        _CE.CREST_EWIN: 8.0,
+                        _CE.CREST_PTHR: 0.4,
+                        _CE.CREST_BTHR: 0.02,
+                    },
+                }
+            },
+        }
+        crest_step = StepCREST(**step_conf)
+        crest_step.data.compounds = [self._paracetamol_molecule]
+        crest_step.execute()
+
+        # check number of conformers returned (only one Compound with only one Enumeration)
+        self.assertGreaterEqual(len(crest_step.get_compounds()[0][0]), 18)
+
+        # check SDF write-out (including energy-as-tag annotation)
+        out_path = os.path.join(self._test_dir, "CREST_conformers_paracetamol.sdf")
+        crest_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 35000)
+
+    def test_coordinate_generation_charged(self):
+        step_conf = {
+            _SBE.STEPID: "01_conf_gen_crest",
+            _SBE.STEP_TYPE: _SBE.STEP_CREST,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: None,
+                _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["CREST_BINARY_LOCATION"],
+                _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 7},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-niceprint"],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _CE.CREST_OPT: "normal",
+                        _CE.CREST_G: "h2o",
+                        _CE.CREST_RTHR: 0.5,
+                        _CE.CREST_ETHR: 0.25,
+                        _CE.CREST_EWIN: 8.0,
+                        _CE.CREST_PTHR: 0.4,
+                        _CE.CREST_BTHR: 0.02,
+                    },
+                }
+            },
+        }
+
+        # check number of conformers returned
+        crest_step = StepCREST(**step_conf)
+        crest_step.data.compounds = [self._aspirin_molecule]
+        crest_step.execute()
+
+        # check number of conformers returned (only one Compound with only one Enumeration)
+        self.assertGreaterEqual(len(crest_step.get_compounds()[0][0]), 2)
+
+        # check SDF write-out (including energy-as-tag annotation)
+        out_path = os.path.join(self._test_dir, "CREST_conformers_aspirin.sdf")
+        crest_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        print(stat_inf.st_size)
+        self.assertGreater(stat_inf.st_size, 3200)
diff --git a/tests/OMEGA/__init__.py b/tests/OMEGA/__init__.py
new file mode 100644
index 0000000..a37a394
--- /dev/null
+++ b/tests/OMEGA/__init__.py
@@ -0,0 +1 @@
+from tests.OMEGA.test_OMEGA_confgen import Test_OMEGA_confgen
diff --git a/tests/OMEGA/test_OMEGA_confgen.py b/tests/OMEGA/test_OMEGA_confgen.py
new file mode 100644
index 0000000..3fdaf15
--- /dev/null
+++ b/tests/OMEGA/test_OMEGA_confgen.py
@@ -0,0 +1,124 @@
+import unittest
+import os
+
+from icolos.core.workflow_steps.confgen.omega import StepOmega
+
+from icolos.utils.enums.step_enums import StepBaseEnum
+from icolos.utils.enums.program_parameters import OMEGAEnum
+
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    export_unit_test_env_vars,
+    get_mol_as_Compound,
+)
+from icolos.utils.general.files_paths import attach_root_path
+
+_SBE = StepBaseEnum
+_CE = OMEGAEnum()
+
+
+class Test_OMEGA_confgen(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+
+        cls._test_dir = attach_root_path("tests/junk/OMEGA")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        self._paracetamol_molecule = get_mol_as_Compound(
+            PATHS_EXAMPLEDATA.PARACETAMOL_PATH
+        )
+        self._aspirin_molecule = get_mol_as_Compound(PATHS_EXAMPLEDATA.ASPIRIN_PATH)
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_coordinate_generation_neutral(self):
+        step_conf = {
+            _SBE.STEPID: "01_conf_gen_omega",
+            _SBE.STEP_TYPE: _SBE.STEP_OMEGA,
+            _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load omega"},
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _CE.CLASSIC_MAXCONFS: 50,
+                        _CE.CLASSIC_RMS: 0.05,
+                    },
+                }
+            },
+        }
+        omega_step = StepOmega(**step_conf)
+        omega_step.data.compounds = [self._paracetamol_molecule]
+        omega_step.execute()
+
+        # check number of conformers returned (only one Compound with only one Enumeration)
+        self.assertEqual(len(omega_step.get_compounds()[0][0]), 2)
+
+        # check SDF write-out (including energy-as-tag annotation)
+        out_path = os.path.join(self._test_dir, "OMEGA_conformers_paracetamol.sdf")
+        omega_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 4274)
+
+    def test_coordinate_generation_neutral_high_RMS(self):
+        step_conf = {
+            _SBE.STEPID: "01_conf_gen_omega",
+            _SBE.STEP_TYPE: _SBE.STEP_OMEGA,
+            _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load omega"},
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _CE.CLASSIC_MAXCONFS: 10,
+                        _CE.CLASSIC_RMS: 0.7,
+                    },
+                }
+            },
+        }
+        omega_step = StepOmega(**step_conf)
+        omega_step.data.compounds = [self._paracetamol_molecule]
+        omega_step.execute()
+
+        # check number of conformers returned (only one Compound with only one Enumeration)
+        self.assertEqual(len(omega_step.get_compounds()[0][0]), 1)
+
+        # check SDF write-out (including energy-as-tag annotation)
+        out_path = os.path.join(
+            self._test_dir, "OMEGA_conformers_paracetamol_highRMS.sdf"
+        )
+        omega_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 2137)
+
+    def test_coordinate_generation_charged(self):
+        step_conf = {
+            _SBE.STEPID: "01_conf_gen_omega",
+            _SBE.STEP_TYPE: _SBE.STEP_OMEGA,
+            _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load omega"},
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _CE.CLASSIC_MAXCONFS: 10,
+                        _CE.CLASSIC_RMS: 0.0,
+                    },
+                }
+            },
+        }
+        omega_step = StepOmega(**step_conf)
+        omega_step.data.compounds = [self._aspirin_molecule]
+        omega_step.execute()
+
+        # check number of conformers returned (only one Compound with only one Enumeration)
+        self.assertEqual(len(omega_step.get_compounds()[0][0]), 2)
+
+        # check SDF write-out (including energy-as-tag annotation)
+        out_path = os.path.join(self._test_dir, "OMEGA_conformers_aspirin.sdf")
+        omega_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 3480)
diff --git a/tests/Turbomole/__init__.py b/tests/Turbomole/__init__.py
new file mode 100644
index 0000000..29b5ecd
--- /dev/null
+++ b/tests/Turbomole/__init__.py
@@ -0,0 +1 @@
+from tests.Turbomole.test_Turbomole import *
diff --git a/tests/Turbomole/test_Turbomole.py b/tests/Turbomole/test_Turbomole.py
new file mode 100644
index 0000000..a71cc90
--- /dev/null
+++ b/tests/Turbomole/test_Turbomole.py
@@ -0,0 +1,253 @@
+import unittest
+import os
+
+from icolos.core.workflow_steps.calculation.turbomole import StepTurbomole
+
+from icolos.utils.enums.step_enums import StepBaseEnum, StepTurbomoleEnum
+from icolos.utils.enums.program_parameters import TurbomoleEnum
+
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    export_unit_test_env_vars,
+    get_mol_as_Compound,
+    get_mol_as_Conformer,
+    MAIN_CONFIG,
+)
+from icolos.utils.enums.compound_enums import ConformerContainerEnum
+from icolos.utils.general.files_paths import attach_root_path
+import time
+
+_SBE = StepBaseEnum
+_TE = TurbomoleEnum()
+_COE = ConformerContainerEnum()
+_STE = StepTurbomoleEnum()
+
+
+class Test_Turbomole(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/Turbomole")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        # initialize a Compound with 1 Enumeration and 2 Conformers (done by OMEGA)
+        _paracetamol_molecule = get_mol_as_Compound(PATHS_EXAMPLEDATA.PARACETAMOL_PATH)
+        confs = get_mol_as_Conformer(PATHS_EXAMPLEDATA.PARACETAMOL_MULTIPLE_CONF)
+        _paracetamol_molecule[0].add_conformers(confs, auto_update=True)
+        self._paracetamol_molecule = _paracetamol_molecule
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_Turbomole_run_ridft_single_core(self):
+        step_conf = {
+            _SBE.STEPID: "01_turbomole",
+            _SBE.STEP_TYPE: _SBE.STEP_TURBOMOLE,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load turbomole/73",
+                _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 1},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _TE.TM_CONFIG_DIR: MAIN_CONFIG["TURBOMOLE_CONFIG"],
+                    _TE.TM_CONFIG_BASENAME: "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge",
+                    _TE.TM_CONFIG_COSMO: os.path.join(
+                        MAIN_CONFIG["TURBOMOLE_CONFIG"], "cosmoprep_eps80.tm"
+                    ),
+                    _STE.EXECUTION_MODE: _TE.TM_RIDFT,
+                },
+            },
+        }
+
+        os.environ["PARA_ARCH"] = "MPI"
+        os.environ["PARNODES"] = "4"
+        tm_step = StepTurbomole(**step_conf)
+        tm_step.data.compounds = [self._paracetamol_molecule]
+
+        # conformer coordinates should not be touched by the execution
+        self.assertListEqual(
+            list(
+                tm_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [0.8785, 0.6004, -0.2173],
+        )
+        tm_step.execute()
+        self.assertListEqual(
+            list(
+                tm_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [0.8785, 0.6004, -0.2173],
+        )
+        cosmofile = tm_step.get_compounds()[0][0][0].get_extra_data()[
+            _COE.EXTRA_DATA_COSMOFILE
+        ]
+        coordfile = tm_step.get_compounds()[0][0][0].get_extra_data()[
+            _COE.EXTRA_DATA_COORDFILE
+        ]
+        self.assertTrue("basgrd points=   9806" in cosmofile[5])
+
+        # check write-out
+        out_path = os.path.join(self._test_dir, "paracetamol_conf1_CosmoFile")
+        with open(out_path, "w") as f:
+            f.writelines(cosmofile)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 132018)
+
+        out_path = os.path.join(self._test_dir, "paracetamol_conf1_CoordFile")
+        with open(out_path, "w") as f:
+            f.writelines(coordfile)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 13544)
+
+    def test_Turbomole_run_ridft_dual_core(self):
+        step_conf = {
+            _SBE.STEPID: "01_turbomole",
+            _SBE.STEP_TYPE: _SBE.STEP_TURBOMOLE,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load turbomole/73",
+                _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 2},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _TE.TM_CONFIG_DIR: MAIN_CONFIG["TURBOMOLE_CONFIG"],
+                    _TE.TM_CONFIG_BASENAME: "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge",
+                    _TE.TM_CONFIG_COSMO: os.path.join(
+                        MAIN_CONFIG["TURBOMOLE_CONFIG"], "cosmoprep_eps80.tm"
+                    ),
+                    _STE.EXECUTION_MODE: _TE.TM_RIDFT,
+                },
+            },
+        }
+        os.environ["PARA_ARCH"] = "MPI"
+        os.environ["PARNODES"] = "4"
+
+        tm_step = StepTurbomole(**step_conf)
+        tm_step.data.compounds = [self._paracetamol_molecule]
+
+        # conformer coordinates should not be touched by the execution
+        self.assertListEqual(
+            list(
+                tm_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [0.8785, 0.6004, -0.2173],
+        )
+        t1 = time.time()
+        tm_step.execute()
+        t2 = time.time()
+
+        self.assertLess(t2 - t1, 50)
+        self.assertListEqual(
+            list(
+                tm_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [0.8785, 0.6004, -0.2173],
+        )
+        cosmofile = tm_step.get_compounds()[0][0][0].get_extra_data()[
+            _COE.EXTRA_DATA_COSMOFILE
+        ]
+        coordfile = tm_step.get_compounds()[0][0][0].get_extra_data()[
+            _COE.EXTRA_DATA_COORDFILE
+        ]
+        self.assertTrue("basgrd points=   9806" in cosmofile[5])
+
+        # check write-out
+        out_path = os.path.join(self._test_dir, "paracetamol_conf1_CosmoFile")
+        with open(out_path, "w") as f:
+            f.writelines(cosmofile)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 132018)
+
+        out_path = os.path.join(self._test_dir, "paracetamole_conf1_CoordFile")
+        with open(out_path, "w") as f:
+            f.writelines(coordfile)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 13544)
+
+    def test_Turbomole_run_jobex(self):
+        step_conf = {
+            _SBE.STEPID: "01_turbomole",
+            _SBE.STEP_TYPE: _SBE.STEP_TURBOMOLE,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load turbomole/73",
+                _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 2},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-ri"],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _TE.TM_JOBEX_C: 70,
+                        _TE.TM_JOBEX_GCART: 3,
+                    },
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _TE.TM_CONFIG_DIR: MAIN_CONFIG["TURBOMOLE_CONFIG"],
+                    _TE.TM_CONFIG_BASENAME: "b97-3c-ri-d3-def2-mtzvp-int-charge",
+                    _TE.TM_CONFIG_COSMO: os.path.join(
+                        MAIN_CONFIG["TURBOMOLE_CONFIG"], "cosmoprep_eps80.tm"
+                    ),
+                    _STE.EXECUTION_MODE: _TE.TM_JOBEX,
+                },
+            },
+        }
+
+        os.environ["PARA_ARCH"] = "MPI"
+        os.environ["PARNODES"] = "3"
+        tm_step = StepTurbomole(**step_conf)
+        tm_step.data.compounds = [self._paracetamol_molecule]
+
+        # conformer coordinates should be touched by the execution (this is geo opt)
+        self.assertListEqual(
+            list(
+                tm_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [0.8785, 0.6004, -0.2173],
+        )
+        tm_step.execute()
+        self.assertListEqual(
+            list(
+                tm_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-2.1919, -3.3229, 0.3518],
+        )
+        cosmofile = tm_step.get_compounds()[0][0][0].get_extra_data()[
+            _COE.EXTRA_DATA_COSMOFILE
+        ]
+
+        self.assertTrue("nspa=   92" in cosmofile[5])
+
+        # check write-out
+        out_path = os.path.join(self._test_dir, "paracetamol_conf1_CosmoFile_jobex")
+        with open(out_path, "w") as f:
+            f.writelines(cosmofile)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 115864)
diff --git a/tests/XTB/__init__.py b/tests/XTB/__init__.py
new file mode 100644
index 0000000..8cf1a75
--- /dev/null
+++ b/tests/XTB/__init__.py
@@ -0,0 +1 @@
+from tests.XTB.test_XTB_confgen import Test_XTB_confgen
diff --git a/tests/XTB/test_XTB_confgen.py b/tests/XTB/test_XTB_confgen.py
new file mode 100644
index 0000000..7166b01
--- /dev/null
+++ b/tests/XTB/test_XTB_confgen.py
@@ -0,0 +1,207 @@
+import unittest
+import os
+
+from icolos.core.workflow_steps.confgen.xtb import StepXTB
+
+from icolos.utils.enums.step_enums import StepBaseEnum
+from icolos.utils.enums.program_parameters import XTBEnum
+
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    MAIN_CONFIG,
+    export_unit_test_env_vars,
+    get_mol_as_Compound,
+    get_ligands_as_compounds_with_conformers,
+    get_mol_as_Conformer,
+)
+from icolos.utils.general.files_paths import attach_root_path
+import time
+
+_SBE = StepBaseEnum
+_CE = XTBEnum()
+
+
+class Test_XTB_confgen(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/XTB")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        self._paracetamol_molecule = get_mol_as_Compound(
+            PATHS_EXAMPLEDATA.PARACETAMOL_PATH
+        )
+        self._aspirin_molecule = get_mol_as_Compound(PATHS_EXAMPLEDATA.ASPIRIN_PATH)
+        self._medium_molecules = get_ligands_as_compounds_with_conformers(
+            PATHS_EXAMPLEDATA.MEDIUM_MOLECULES_SDF_PATH
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_coordinate_generation(self):
+        step_conf = {
+            _SBE.STEPID: "01_conf_genXTB",
+            _SBE.STEP_TYPE: _SBE.STEP_XTB,
+            _SBE.EXEC: {
+                _SBE.EXEC_BINARYLOCATION: attach_root_path(
+                    os.path.join(MAIN_CONFIG["XTBHOME"])
+                ),
+                _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 7},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _CE.XTB_OPT: "vtight",
+                        _CE.XTB_GBSA: "h2o",
+                    },
+                }
+            },
+        }
+        xtb_step = StepXTB(**step_conf)
+        xtb_step.data.compounds = [self._paracetamol_molecule]
+        confs = get_mol_as_Conformer(
+            attach_root_path(PATHS_EXAMPLEDATA.PARACETAMOL_MULTIPLE_CONF)
+        )
+        xtb_step.data.compounds[0][0].add_conformers(confs, auto_update=True)
+        self.assertListEqual(
+            list(
+                xtb_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [0.8785, 0.6004, -0.2173],
+        )
+        xtb_step.execute()
+        self.assertListEqual(
+            list(
+                xtb_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [0.8852, 0.6805, -0.1339],
+        )
+
+        # check number of conformers returned (only one Compound with only one Enumeration)
+        self.assertEqual(len(xtb_step.get_compounds()[0][0]), 2)
+        # check SDF write-out (including energy-as-tag annotation)
+        out_path = os.path.join(
+            self._test_dir, "XTB_conformers_from_OMEGA_paracetamol.sdf"
+        )
+        xtb_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 4414)
+
+    def test_single_core_execution(self):
+        step_conf = {
+            _SBE.STEPID: "01_conf_genXTB",
+            _SBE.STEP_TYPE: _SBE.STEP_XTB,
+            _SBE.EXEC: {
+                _SBE.EXEC_BINARYLOCATION: attach_root_path(
+                    os.path.join(MAIN_CONFIG["XTBHOME"])
+                ),
+                _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 1},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _CE.XTB_OPT: "vtight",
+                        _CE.XTB_GBSA: "h2o",
+                    },
+                }
+            },
+        }
+        xtb_step = StepXTB(**step_conf)
+        xtb_step.data.compounds = self._medium_molecules
+        self.assertListEqual(
+            list(
+                xtb_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-2.5065, -0.0698, 5.1132],
+        )
+        xtb_step.execute()
+        self.assertListEqual(
+            list(
+                xtb_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-2.0964, -0.6968, 4.0397],
+        )
+
+        # check number of conformers returned (only one Compound with only one Enumeration)
+        self.assertEqual(len(xtb_step.get_compounds()[0][0]), 1)
+        # check SDF write-out (including energy-as-tag annotation)
+        out_path = os.path.join(
+            self._test_dir, "XTB_conformers_from_OMEGA_paracetamol.sdf"
+        )
+        xtb_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 6874)
+
+    def test_parallel_execution(self):
+        step_conf = {
+            _SBE.STEPID: "01_conf_genXTB",
+            _SBE.STEP_TYPE: _SBE.STEP_XTB,
+            _SBE.EXEC: {
+                _SBE.EXEC_BINARYLOCATION: attach_root_path(
+                    os.path.join(MAIN_CONFIG["XTBHOME"])
+                ),
+                _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 8},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _CE.XTB_OPT: "vtight",
+                        _CE.XTB_GBSA: "h2o",
+                    },
+                }
+            },
+        }
+        xtb_step = StepXTB(**step_conf)
+        xtb_step.data.compounds = self._medium_molecules
+        self.assertListEqual(
+            list(
+                xtb_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-2.5065, -0.0698, 5.1132],
+        )
+        t1 = time.time()
+        xtb_step.execute()
+        t2 = time.time()
+        self.assertLess(t2 - t1, 4)
+        self.assertListEqual(
+            list(
+                xtb_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-2.0964, -0.6968, 4.0397],
+        )
+
+        # check number of conformers returned (only one Compound with only one Enumeration)
+        self.assertEqual(len(xtb_step.get_compounds()[0][0]), 1)
+        # check SDF write-out (including energy-as-tag annotation)
+        out_path = os.path.join(
+            self._test_dir, "XTB_conformers_from_OMEGA_paracetamol.sdf"
+        )
+        xtb_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 6874)
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/autodockvina/__init__.py b/tests/autodockvina/__init__.py
new file mode 100644
index 0000000..1637d7b
--- /dev/null
+++ b/tests/autodockvina/__init__.py
@@ -0,0 +1,2 @@
+from tests.autodockvina.test_adv_docking import *
+from tests.autodockvina.test_adv_target_prep import *
diff --git a/tests/autodockvina/test_adv_docking.py b/tests/autodockvina/test_adv_docking.py
new file mode 100644
index 0000000..662f7e7
--- /dev/null
+++ b/tests/autodockvina/test_adv_docking.py
@@ -0,0 +1,95 @@
+import os
+import unittest
+
+from icolos.core.workflow_steps.autodockvina.docking import StepAutoDockVina
+
+from icolos.utils.enums.step_enums import StepBaseEnum, StepAutoDockVinaEnum
+from icolos.utils.enums.program_parameters import AutoDockVinaEnum
+
+from tests.tests_paths import PATHS_1UYD, get_1UYD_ligands_as_Compounds
+from icolos.utils.general.files_paths import attach_root_path
+
+_SBE = StepBaseEnum
+_SAE = StepAutoDockVinaEnum()
+_EE = AutoDockVinaEnum()
+
+
+class Test_ADV_docking(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/ADV")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        self._1UYD_compounds = get_1UYD_ligands_as_Compounds(
+            abs_path=PATHS_1UYD.LIGANDS
+        )
+        self.receptor_path = PATHS_1UYD.PDBQT_PATH
+
+    def test_ADV_run(self):
+        step_conf = {
+            _SBE.STEPID: "01_ADV",
+            _SBE.STEP_TYPE: _SBE.STEP_AUTODOCKVINA_DOCKING,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load AutoDock_Vina",
+                _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 4},
+                _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SAE.CONFIGURATION: {
+                        _SAE.ADV_SEARCH_SPACE: {
+                            _SAE.ADV_SEARCH_SPACE_CENTER_X: 3.3,
+                            _SAE.ADV_SEARCH_SPACE_CENTER_Y: 11.5,
+                            _SAE.ADV_SEARCH_SPACE_CENTER_Z: 24.8,
+                            _SAE.ADV_SEARCH_SPACE_SIZE_Y: 10,
+                            _SAE.ADV_SEARCH_SPACE_SIZE_Z: 10,
+                        },
+                        _SAE.NUMBER_POSES: 2,
+                        _SAE.ADV_RECEPTOR_PATH: self.receptor_path,
+                    }
+                },
+            },
+        }
+
+        adv_step = StepAutoDockVina(**step_conf)
+        adv_step.data.compounds = self._1UYD_compounds
+
+        adv_step.execute()
+        self.assertEqual(len(adv_step.get_compounds()), 15)
+        self.assertEqual(len(adv_step.get_compounds()[0][0].get_conformers()), 2)
+        self.assertListEqual(
+            list(
+                adv_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [1.953, 10.113, 25.16],
+        )
+        self.assertListEqual(
+            list(
+                adv_step.get_compounds()[14][0][1]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [3.682, 15.421, 26.244],
+        )
+        self.assertEqual(
+            adv_step.get_compounds()[0][0][0]
+            .get_molecule()
+            .GetProp(_SBE.ANNOTATION_TAG_DOCKING_SCORE),
+            "-9.1",
+        )
+
+        # check SDF write-out
+        out_path = os.path.join(self._test_dir, "adv_docked.sdf")
+        adv_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 105000)
diff --git a/tests/autodockvina/test_adv_target_prep.py b/tests/autodockvina/test_adv_target_prep.py
new file mode 100644
index 0000000..b8e1c9a
--- /dev/null
+++ b/tests/autodockvina/test_adv_target_prep.py
@@ -0,0 +1,90 @@
+import unittest
+import os
+
+from icolos.core.workflow_steps.autodockvina.target_preparation import (
+    StepAutoDockVinaTargetPreparation,
+)
+from icolos.utils.enums.step_enums import (
+    StepBaseEnum,
+    StepAutoDockVinaTargetPreparationEnum,
+)
+from icolos.utils.general.files_paths import attach_root_path
+from tests.tests_paths import PATHS_1UYD
+
+_SBE = StepBaseEnum
+_SAE = StepAutoDockVinaTargetPreparationEnum()
+
+
+class Test_ADV_target_preparation(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/ADV_target_prep")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+        cls.receptor_output_path = os.path.join(cls._test_dir, "ADV_receptor.pdbqt")
+
+    def setUp(self):
+        self.receptor_input_path = PATHS_1UYD.PDB_PATH
+        self.reference_ligand_sdf_path = PATHS_1UYD.NATIVE_LIGAND_SDF
+        self.reference_ligand_pdb_path = PATHS_1UYD.NATIVE_LIGAND_PDB
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_extract_box(self):
+        step_conf = {
+            _SBE.STEPID: "01_ADV",
+            _SBE.STEP_TYPE: _SBE.STEP_AUTODOCKVINA_TARGET_PREPARATION,
+            _SBE.EXEC: {},
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SAE.INPUT_RECEPTOR_PDB: self.receptor_input_path,
+                    _SAE.OUTPUT_RECEPTOR_PDBQT: self.receptor_output_path,
+                    _SAE.EXTRACT_BOX: {
+                        _SAE.EXTRACT_BOX_REFERENCE_LIGAND_PATH: self.reference_ligand_sdf_path,
+                        _SAE.EXTRACT_BOX_REFERENCE_LIGAND_FORMAT: _SAE.EXTRACT_BOX_REFERENCE_LIGAND_FORMAT_SDF,
+                    },
+                },
+            },
+        }
+
+        adv_tp_step = StepAutoDockVinaTargetPreparation(**step_conf)
+        x_coords, y_coords, z_coords = adv_tp_step._extract_box()
+
+        self.assertEqual(len(x_coords), 28)
+        self.assertListEqual([4.403, 5.122, 5.091], x_coords[:3])
+        self.assertListEqual([15.528, 15.084, 13.786], y_coords[:3])
+        self.assertListEqual([26.579, 25.453, 24.846], z_coords[:3])
+
+    def test_target_preparation(self):
+        step_conf = {
+            _SBE.STEPID: "01_ADV",
+            _SBE.STEP_TYPE: _SBE.STEP_AUTODOCKVINA_TARGET_PREPARATION,
+            _SBE.EXEC: {},
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SAE.INPUT_RECEPTOR_PDB: self.receptor_input_path,
+                    _SAE.OUTPUT_RECEPTOR_PDBQT: self.receptor_output_path,
+                    _SAE.EXTRACT_BOX: {
+                        _SAE.EXTRACT_BOX_REFERENCE_LIGAND_PATH: self.reference_ligand_pdb_path,
+                        _SAE.EXTRACT_BOX_REFERENCE_LIGAND_FORMAT: _SAE.EXTRACT_BOX_REFERENCE_LIGAND_FORMAT_PDB,
+                    },
+                },
+            },
+        }
+
+        adv_tp_step = StepAutoDockVinaTargetPreparation(**step_conf)
+        adv_tp_step.execute()
+
+        # check SDF write-out
+        stat_inf = os.stat(self.receptor_output_path)
+        self.assertGreater(stat_inf.st_size, 290000)
diff --git a/tests/boltzmann_weighting/__init__.py b/tests/boltzmann_weighting/__init__.py
new file mode 100644
index 0000000..6457ffb
--- /dev/null
+++ b/tests/boltzmann_weighting/__init__.py
@@ -0,0 +1 @@
+from tests.boltzmann_weighting.test_boltzmann_weighting import Test_BoltzmannWeighting
diff --git a/tests/boltzmann_weighting/test_boltzmann_weighting.py b/tests/boltzmann_weighting/test_boltzmann_weighting.py
new file mode 100644
index 0000000..846d7d3
--- /dev/null
+++ b/tests/boltzmann_weighting/test_boltzmann_weighting.py
@@ -0,0 +1,130 @@
+import unittest
+import os
+
+from icolos.core.containers.compound import Compound, Enumeration
+from icolos.core.workflow_steps.calculation.boltzmann_weighting import (
+    StepBoltzmannWeighting,
+)
+
+from icolos.utils.enums.step_enums import StepBaseEnum, StepBoltzmannWeightingEnum
+
+from tests.tests_paths import PATHS_EXAMPLEDATA, get_mol_as_Conformer
+from icolos.utils.general.files_paths import attach_root_path
+
+_SBE = StepBaseEnum
+_SBWE = StepBoltzmannWeightingEnum()
+
+
+class Test_BoltzmannWeighting(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/BoltzmannWeighting")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        # this file has the necessary properties for the different solvents annotated as tags
+        self._example_mol_path = (
+            PATHS_EXAMPLEDATA.EPSA_BOLTZMANN_WEIGHTING_EXAMPLE_MOLECULE
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_predict_ePSA_with_descriptors(self):
+        step_conf = {
+            _SBE.STEPID: "01_boltzmann_weighting",
+            _SBE.STEP_TYPE: _SBE.STEP_BOLTZMANN_WEIGHTING,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _SBWE.PROPERTIES: [
+                            {
+                                _SBWE.PROPERTIES_INPUT: "G_h2o",
+                                _SBWE.PROPERTIES_OUTPUT: "boltzfactor_wat",
+                            },
+                            {
+                                _SBWE.PROPERTIES_INPUT: "G_meoh",
+                                _SBWE.PROPERTIES_OUTPUT: "boltzfactor_meoh",
+                            },
+                            {
+                                _SBWE.PROPERTIES_INPUT: "G_octanol",
+                                _SBWE.PROPERTIES_OUTPUT: "boltzfactor_octanol",
+                            },
+                            {
+                                _SBWE.PROPERTIES_INPUT: "G_dmso",
+                                _SBWE.PROPERTIES_OUTPUT: "boltzfactor_dmso",
+                            },
+                            {
+                                _SBWE.PROPERTIES_INPUT: "G_cychex",
+                                _SBWE.PROPERTIES_OUTPUT: "boltzfactor_cychex",
+                            },
+                            {
+                                _SBWE.PROPERTIES_INPUT: "G_chcl3",
+                                _SBWE.PROPERTIES_OUTPUT: "boltzfactor_chcl3",
+                            },
+                            {
+                                _SBWE.PROPERTIES_INPUT: "G_acn",
+                                _SBWE.PROPERTIES_OUTPUT: "boltzfactor_acn",
+                            },
+                            {
+                                _SBWE.PROPERTIES_INPUT: "G_thf",
+                                _SBWE.PROPERTIES_OUTPUT: "boltzfactor_thf",
+                            },
+                        ],
+                        _SBWE.WEIGHT: {
+                            _SBWE.WEIGHT_INPUT: [
+                                "area",
+                                "HB_acc",
+                                "volume",
+                                "HB_don",
+                                "sigma2",
+                                "Gsolv_meoh",
+                            ],
+                            _SBWE.WEIGHT_OUTPUT_PREFIX: "bf_weighted",
+                            _SBWE.WEIGHT_PROPERTIES: [
+                                "boltzfactor_dmso",
+                                "boltzfactor_wat",
+                                "boltzfactor_meoh",
+                                "boltzfactor_cychex",
+                            ],
+                        },
+                    }
+                }
+            },
+        }
+        bweigh_step = StepBoltzmannWeighting(**step_conf)
+        bweigh_step.get_compounds().append(Compound())
+        bweigh_step.get_compounds()[0].add_enumeration(Enumeration(), auto_update=True)
+        conformer = get_mol_as_Conformer(self._example_mol_path)
+        bweigh_step.data.compounds[0][0].add_conformers(conformer, auto_update=True)
+        bweigh_step.execute()
+
+        self.assertEqual(len(bweigh_step.get_compounds()), 1)
+        self.assertEqual(len(bweigh_step.get_compounds()[0]), 1)
+        self.assertEqual(len(bweigh_step.get_compounds()[0][0]), 1)
+
+        self.assertListEqual(
+            list(
+                bweigh_step.get_compounds()[0][0]
+                .get_conformers()[0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [1.9524, -0.9976, -1.5113],
+        )
+        self.assertEqual(
+            19,
+            bweigh_step.get_compounds()[0][0]
+            .get_conformers()[0]
+            .get_molecule()
+            .GetNumAtoms(),
+        )
+
+        # check SDF write-out (including Boltzmann-weighted properties as tags)
+        out_path = os.path.join(self._test_dir, "boltzmann_weighted_annotated.sdf")
+        bweigh_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 4419)
diff --git a/tests/cavity_explorer/__init__.py b/tests/cavity_explorer/__init__.py
new file mode 100644
index 0000000..6b32d37
--- /dev/null
+++ b/tests/cavity_explorer/__init__.py
@@ -0,0 +1 @@
+from tests.cavity_explorer.test_md_pocket import *
diff --git a/tests/cavity_explorer/test_md_pocket.py b/tests/cavity_explorer/test_md_pocket.py
new file mode 100644
index 0000000..317035d
--- /dev/null
+++ b/tests/cavity_explorer/test_md_pocket.py
@@ -0,0 +1,115 @@
+from icolos.core.containers.generic import GenericData
+import unittest
+from icolos.utils.enums.step_enums import StepCavExploreEnum, StepBaseEnum
+from tests.tests_paths import PATHS_EXAMPLEDATA
+from icolos.core.workflow_steps.cavity_explorer.mdpocket import StepMDpocket
+from icolos.utils.general.files_paths import attach_root_path
+
+import os
+
+_SBE = StepBaseEnum
+_SFP = StepCavExploreEnum()
+
+
+class Test_MDPocket(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/cavity_explorer")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        # read in the example files here
+        self.cav_folder = PATHS_EXAMPLEDATA.CAVITY_TRJ_FOLDER
+        with open(PATHS_EXAMPLEDATA.CAVITY_DTR_FILE, "rb") as f:
+            self.dtr_data = f.read()
+        with open(PATHS_EXAMPLEDATA.MD_POCKET_DESMOND_TOP, "r") as f:
+            self.desmond_pdb = f.read()
+        with open(PATHS_EXAMPLEDATA.MDPOCKET_XTC_FILE, "rb") as f:
+            self.xtc_data = f.read()
+        with open(PATHS_EXAMPLEDATA.MDPOCKET_PDB_FILE, "r") as f:
+            self.pdb_file = f.read()
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_MDpocket_desmond(self):
+        step_conf = {
+            _SBE.STEPID: "01_cavity_explorer_file_preparation",
+            _SBE.STEP_TYPE: _SBE.STEP_MDPOCKET,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load fpocket",
+                _SBE.EXEC_PARALLELIZATION: {
+                    _SBE.EXEC_PARALLELIZATION_CORES: 8,
+                    _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1,
+                },
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SFP.SELECTION_TEXT: _SFP.PROTEIN,
+                    _SFP.TRAJ_TYPE: "desmond",
+                }
+            },
+        }
+
+        mdpocket_step = StepMDpocket(**step_conf)
+        mdpocket_step.data.generic.add_file(
+            GenericData(
+                file_name="trj_folder", file_data=self.cav_folder, argument=False
+            )
+        )
+        mdpocket_step.data.generic.add_file(
+            GenericData(
+                file_name="structure.pdb", file_data=self.desmond_pdb, argument=True
+            )
+        )
+        mdpocket_step.data.generic.add_file(
+            GenericData(file_name="clickme.dtr", file_data=self.dtr_data, argument=True)
+        )
+        mdpocket_step.execute()
+
+        out_path = os.path.join(self._test_dir, "pocket_0_descriptors.txt")
+        mdpocket_step.write_generic_by_extension(self._test_dir, "txt")
+
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 187400)
+
+    def test_MDpocket_xtc(self):
+        step_conf = {
+            _SBE.STEPID: "01_cavity_explorer_file_preparation",
+            _SBE.STEP_TYPE: _SBE.STEP_MDPOCKET,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load fpocket",
+                _SBE.EXEC_PARALLELIZATION: {
+                    _SBE.EXEC_PARALLELIZATION_CORES: 4,
+                    _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 2,  # this will be automatically overwritten
+                },
+                _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SFP.SELECTION_TEXT: _SFP.PROTEIN,
+                    _SFP.TRAJ_TYPE: "gromacs",
+                }
+            },
+        }
+
+        mdpocket_step = StepMDpocket(**step_conf)
+
+        mdpocket_step.data.generic.add_file(
+            GenericData(
+                file_name="structure.xtc", file_data=self.xtc_data, argument=True
+            )
+        )
+        mdpocket_step.data.generic.add_file(
+            GenericData(
+                file_name="structure.pdb", file_data=self.pdb_file, argument=True
+            )
+        )
+        mdpocket_step.execute()
+
+        out_path = os.path.join(self._test_dir, "pocket_1_descriptors.txt")
+        mdpocket_step.write_generic_by_name(self._test_dir, "pocket_1_descriptors.txt")
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 700)
diff --git a/tests/clustering/__init__.py b/tests/clustering/__init__.py
new file mode 100644
index 0000000..969d71d
--- /dev/null
+++ b/tests/clustering/__init__.py
@@ -0,0 +1 @@
+from tests.clustering.test_clustering import *
diff --git a/tests/clustering/test_clustering.py b/tests/clustering/test_clustering.py
new file mode 100644
index 0000000..4b38264
--- /dev/null
+++ b/tests/clustering/test_clustering.py
@@ -0,0 +1,91 @@
+import unittest
+
+from icolos.core.containers.compound import Compound, Enumeration
+from icolos.core.workflow_steps.calculation.clustering import StepClustering
+
+from icolos.utils.enums.step_enums import StepBaseEnum, StepClusteringEnum
+
+from tests.tests_paths import PATHS_EXAMPLEDATA, get_mol_as_Conformer
+
+_SBE = StepBaseEnum
+_SC = StepClusteringEnum()
+
+
+class Test_Clustering(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        pass
+
+    def setUp(self):
+        pass
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_Clustering(self):
+        step_conf = {
+            _SBE.STEPID: "01_clustering",
+            _SBE.STEP_TYPE: _SBE.STEP_CLUSTERING,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _SC.N_CLUSTERS: 3,
+                        _SC.MAX_ITER: 300,
+                    },
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SC.TOP_N_PER_SOLVENT: 3,
+                    _SC.FEATURES: ["area", "dipole", "HB_acc"],
+                    _SC.FREE_ENERGY_SOLVENT_TAGS: [
+                        "G_h2o",
+                        "G_meoh",
+                        "G_octanol",
+                        "G_dmso",
+                        "G_cychex",
+                        "G_acn",
+                        "G_thf",
+                    ],
+                },
+            },
+        }
+
+        cl_step = StepClustering(**step_conf)
+        cl_step.get_compounds().append(Compound(compound_number=1))
+        cl_step.get_compounds()[0].add_enumeration(Enumeration(), auto_update=True)
+        conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS)
+        cl_step.data.compounds[0][0].add_conformers(conformers, auto_update=True)
+
+        # 11 conformers are put in, but due to clustering only 10 should come out; note, that if only one solvent was
+        # selected, only 9 would be outputted (this is because 2 of the clusters have 4 members and TOP_N_PER_SOLVENT
+        # is set to 3)
+        self.assertEqual(len(cl_step.get_compounds()[0][0].get_conformers()), 11)
+        cl_step.execute()
+        self.assertEqual(len(cl_step.get_compounds()[0][0].get_conformers()), 10)
+
+        # make sure it is the 10th element (index 9) that has been removed
+        self.assertListEqual(
+            [
+                list(
+                    cl_step.get_compounds()[0][0]
+                    .get_conformers()[i]
+                    .get_molecule()
+                    .GetConformer(0)
+                    .GetPositions()[0]
+                )
+                for i in range(10)
+            ],
+            [
+                [0.8838, 0.6808, -0.1373],
+                [-4.2269, -0.441, 0.2359],
+                [-4.1693, -0.532, -0.0567],
+                [-4.2326, -0.4701, 0.3534],
+                [-4.201, -0.5446, 0.131],
+                [-4.2198, -0.4705, 0.1656],
+                [-4.2318, -0.444, 0.2474],
+                [-4.2316, -0.14, 0.0848],
+                [-4.1953, -0.1989, -0.1017],
+                [-4.1528, -0.0208, 0.0932],
+            ],
+        )
diff --git a/tests/composite_agents/__init__.py b/tests/composite_agents/__init__.py
new file mode 100644
index 0000000..10c612c
--- /dev/null
+++ b/tests/composite_agents/__init__.py
@@ -0,0 +1 @@
+from tests.composite_agents.test_workflow import *
diff --git a/tests/composite_agents/test_workflow.py b/tests/composite_agents/test_workflow.py
new file mode 100644
index 0000000..37abeb1
--- /dev/null
+++ b/tests/composite_agents/test_workflow.py
@@ -0,0 +1,451 @@
+import unittest
+import os
+from rdkit import Chem
+
+from icolos.core.composite_agents.workflow import WorkFlow
+
+from icolos.utils.enums.step_enums import StepBaseEnum
+from icolos.utils.enums.composite_agents_enums import WorkflowEnum
+from icolos.utils.enums.program_parameters import OMEGAEnum
+from icolos.utils.enums.program_parameters import XTBEnum
+from icolos.utils.enums.program_parameters import CrestEnum
+from icolos.utils.enums.program_parameters import TurbomoleEnum
+from icolos.utils.enums.program_parameters import PantherEnum
+from icolos.core.steps_utils import initialize_step_from_dict
+
+from tests.tests_paths import PATHS_EXAMPLEDATA, MAIN_CONFIG
+from icolos.utils.general.files_paths import attach_root_path
+
+_SBE = StepBaseEnum
+_WE = WorkflowEnum()
+_OE = OMEGAEnum()
+_XE = XTBEnum()
+_CE = CrestEnum()
+_TE = TurbomoleEnum()
+_PE = PantherEnum()
+
+
+class Test_workflow(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/workflow")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        _paracetamol_path = PATHS_EXAMPLEDATA.PARACETAMOL_PATH
+        mol_supplier = Chem.SDMolSupplier(_paracetamol_path, removeHs=False)
+        for mol in mol_supplier:
+            self._molecule = mol
+
+        # TODO: move header variables to MAIN_CONFIG
+        self._HEADER_EXPORT = {
+            _WE.ENVIRONMENT_EXPORT: [
+                {
+                    _WE.ENVIRONMENT_EXPORT_KEY: "OE_LICENSE",
+                    _WE.ENVIRONMENT_EXPORT_VALUE: "/opt/scp/software/oelicense/1.0/oe_license.seq1",
+                },
+                {
+                    _WE.ENVIRONMENT_EXPORT_KEY: "XTBHOME",
+                    _WE.ENVIRONMENT_EXPORT_VALUE: "/opt/scp/services/reinvent/Icolos/binaries/xtb-6.3.2",
+                },
+                {
+                    _WE.ENVIRONMENT_EXPORT_KEY: "XTBPATH",
+                    _WE.ENVIRONMENT_EXPORT_VALUE: "${XTBHOME}/share/xtb",
+                },
+                {
+                    _WE.ENVIRONMENT_EXPORT_KEY: "PATH",
+                    _WE.ENVIRONMENT_EXPORT_VALUE: "${PATH}:${XTBHOME}/bin",
+                },
+                {
+                    _WE.ENVIRONMENT_EXPORT_KEY: "PKG_CONFIG_PATH",
+                    _WE.ENVIRONMENT_EXPORT_VALUE: "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig",
+                },
+            ]
+        }
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_workflow_initialization(self):
+        conf = {
+            _WE.HEADER: {_WE.ID: "test_workflow", _WE.ENVIRONMENT: self._HEADER_EXPORT},
+            _WE.STEPS: [
+                {
+                    _SBE.STEPID: "crest_confgen",
+                    _SBE.STEP_TYPE: _SBE.STEP_CREST,
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: None,
+                        _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["CREST_BINARY_LOCATION"],
+                        _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 7},
+                    },
+                },
+                {
+                    _SBE.STEPID: "omega_confgen",
+                    _SBE.STEP_TYPE: _SBE.STEP_OMEGA,
+                    _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load omega"},
+                },
+            ],
+        }
+        wflow = WorkFlow(**conf)
+        wflow.initialize()
+        self.assertEqual(len(wflow.steps), 2)
+        wflow.add_step(
+            initialize_step_from_dict(
+                {
+                    _SBE.STEPID: "omega_confgen2",
+                    _SBE.STEP_TYPE: _SBE.STEP_OMEGA,
+                    _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load omega"},
+                }
+            )
+        )
+        self.assertEqual(len(wflow.steps), 2)
+        self.assertEqual(len(wflow.get_steps()), 3)
+
+    def test_workflow_with_global_variables(self):
+        out_path = os.path.join(self._test_dir, "global_variables_out.sdf")
+        conf = {
+            _WE.HEADER: {
+                _WE.ID: "test_workflow",
+                _WE.DESCRIPTION: "this is a test description",
+                _WE.ENVIRONMENT: self._HEADER_EXPORT,
+                _WE.GLOBAL_VARIABLES: {
+                    "root_dir": attach_root_path(""),
+                },
+            },
+            _WE.STEPS: [
+                {
+                    _SBE.STEPID: "01_initialization",
+                    _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION,
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: PATHS_EXAMPLEDATA.PARACETAMOL_PATH,
+                                _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE,
+                                _SBE.INPUT_FORMAT: _SBE.FORMAT_SDF,
+                            }
+                        ]
+                    },
+                }
+            ],
+        }
+        wflow = WorkFlow(**conf)
+        wflow.initialize()
+        wflow.execute()
+
+    def test_workflow_execution(self):
+        conf = {
+            _WE.HEADER: {
+                _WE.ID: "test_workflow",
+                _WE.DESCRIPTION: "this is a test description",
+                _WE.ENVIRONMENT: self._HEADER_EXPORT,
+            },
+            _WE.STEPS: [
+                {
+                    _SBE.STEPID: "01a_initialization",
+                    _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION,
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: PATHS_EXAMPLEDATA.PARACETAMOL_PATH,
+                                _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE,
+                                _SBE.INPUT_FORMAT: _SBE.FORMAT_SDF,
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "01b_initialization",
+                    _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION,
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: PATHS_EXAMPLEDATA.ASPIRIN_PATH,
+                                _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE,
+                                _SBE.INPUT_FORMAT: _SBE.FORMAT_SDF,
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "02_omega_confgen",
+                    _SBE.STEP_TYPE: _SBE.STEP_OMEGA,
+                    _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load omega"},
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                _OE.CLASSIC_MAXCONFS: 10,
+                                _OE.CLASSIC_RMS: 0.0,
+                            },
+                        }
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "01a_initialization",
+                                _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP,
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "01b_initialization",
+                                _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP,
+                            },
+                        ],
+                        _SBE.INPUT_MERGE: {
+                            _SBE.INPUT_MERGE_COMPOUNDS: True,
+                            _SBE.INPUT_MERGE_COMPOUNDS_BY: "id",
+                            _SBE.INPUT_MERGE_ENUMERATIONS: True,
+                            _SBE.INPUT_MERGE_ENUMERATIONS_BY: "id",
+                        },
+                    },
+                },
+                {
+                    _SBE.STEPID: "02_conf_gen_crest",
+                    _SBE.STEP_TYPE: _SBE.STEP_CREST,
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: None,
+                        _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["CREST_BINARY_LOCATION"],
+                        _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 7},
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-niceprint"],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                _CE.CREST_OPT: "normal",
+                                _CE.CREST_G: "h2o",
+                                _CE.CREST_RTHR: 0.5,
+                                _CE.CREST_ETHR: 0.25,
+                                _CE.CREST_EWIN: 8.0,
+                                _CE.CREST_PTHR: 0.4,
+                                _CE.CREST_BTHR: 0.02,
+                            },
+                        }
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "01a_initialization",
+                                _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP,
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "01_conf_genXTB",
+                    _SBE.STEP_TYPE: _SBE.STEP_XTB,
+                    _SBE.EXEC: {
+                        _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["XTBHOME"],
+                        _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 7},
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                _XE.XTB_OPT: "vtight",
+                                _XE.XTB_GBSA: "h2o",
+                            },
+                        }
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "02_omega_confgen",
+                                _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP,
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "02_conf_gen_crest",
+                                _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP,
+                            },
+                        ],
+                        _SBE.INPUT_MERGE: {
+                            _SBE.INPUT_MERGE_COMPOUNDS: True,
+                            _SBE.INPUT_MERGE_COMPOUNDS_BY: "id",
+                            _SBE.INPUT_MERGE_ENUMERATIONS: True,
+                            _SBE.INPUT_MERGE_ENUMERATIONS_BY: "id",
+                        },
+                    },
+                },
+            ],
+        }
+        wflow = WorkFlow(**conf)
+        wflow.initialize()
+        wflow.execute()
+
+        # check SDF write-out (including energy-as-tag annotation)
+        out_path = os.path.join(self._test_dir, "02a_omega_confgen.sdf")
+        wflow[2].write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertGreaterEqual(stat_inf.st_size, 4252)
+        out_path = os.path.join(self._test_dir, "02b_crest_confgen.sdf")
+        wflow[3].write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertGreaterEqual(stat_inf.st_size, 47156)
+        out_path = os.path.join(self._test_dir, "03_XTB_from_omega_and_crest.sdf")
+        wflow[4].write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertGreaterEqual(stat_inf.st_size, 52807)
+
+    def test_ePSA_workflow_execution(self):
+        conf = {
+            _WE.HEADER: {
+                _WE.ID: "test_workflow",
+                _WE.DESCRIPTION: "this is a test description",
+                _WE.ENVIRONMENT: self._HEADER_EXPORT,
+            },
+            _WE.STEPS: [
+                {
+                    _SBE.STEPID: "01_initialization_paracetamol",
+                    _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION,
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: PATHS_EXAMPLEDATA.PARACETAMOL_PATH,
+                                _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE,
+                                _SBE.INPUT_FORMAT: _SBE.FORMAT_SDF,
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "01_initialization_aspirin",
+                    _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION,
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: PATHS_EXAMPLEDATA.ASPIRIN_PATH,
+                                _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE,
+                                _SBE.INPUT_FORMAT: _SBE.FORMAT_SDF,
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "02_omega_confgen",
+                    _SBE.STEP_TYPE: _SBE.STEP_OMEGA,
+                    _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load omega"},
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                _OE.CLASSIC_MAXCONFS: 200,
+                                _OE.CLASSIC_RMS: 0.0,
+                                _OE.CLASSIC_CANON_ORDER: "false",
+                            },
+                        }
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "01_initialization_paracetamol",
+                                _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP,
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "01_initialization_aspirin",
+                                _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP,
+                            },
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "03_conf_optXTB",
+                    _SBE.STEP_TYPE: _SBE.STEP_XTB,
+                    _SBE.EXEC: {
+                        _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["XTBHOME"],
+                        _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 7},
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                _XE.XTB_OPT: "vtight",
+                                _XE.XTB_GBSA: "h2o",
+                            },
+                        }
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "02_omega_confgen",
+                                _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP,
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "04_turbomole",
+                    _SBE.STEP_TYPE: _SBE.STEP_TURBOMOLE,
+                    _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load turbomole/73"},
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            _TE.TM_CONFIG_DIR: "/projects/cc/mai/material/Icolos/turbomole_config",
+                            _TE.TM_CONFIG_BASENAME: "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge",
+                            _TE.TM_CONFIG_COSMO: "/projects/cc/mai/material/Icolos/turbomole_config/cosmoprep_eps80.tm",
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "03_conf_optXTB",
+                                _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP,
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "05_cosmo",
+                    _SBE.STEP_TYPE: _SBE.STEP_COSMO,
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load COSMOtherm/19.0.4"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        }
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "04_turbomole",
+                                _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STEP,
+                            }
+                        ]
+                    },
+                    _SBE.WRITEOUT: [
+                        {
+                            _SBE.WRITEOUT_COMP: {
+                                _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS
+                            },
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE,
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_SDF,
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: os.path.join(
+                                    self._test_dir, "05_cosmo_ePSA_workflow.sdf"
+                                ),
+                            },
+                        }
+                    ],
+                },
+            ],
+        }
+        wflow = WorkFlow(**conf)
+        wflow.initialize()
+        wflow.execute()
+
+        # check SDF write-out (including energy-as-tag annotation)
+        out_path = os.path.join(self._test_dir, "02_omega_confgen.sdf")
+        wflow.find_step_by_step_id("02_omega_confgen").write_conformers(out_path)
+        self.assertGreater(8200, os.stat(out_path).st_size)
+        out_path = os.path.join(self._test_dir, "03_conf_optXTB.sdf")
+        wflow.find_step_by_step_id("03_conf_optXTB").write_conformers(out_path)
+        self.assertGreater(8200, os.stat(out_path).st_size)
+        out_path = os.path.join(self._test_dir, "04_turbomole.sdf")
+        wflow.find_step_by_step_id("04_turbomole").write_conformers(out_path)
+        self.assertGreater(82008, os.stat(out_path).st_size)
+        out_path = os.path.join(self._test_dir, "05_cosmo_ePSA_workflow.sdf")
+        self.assertGreater(12500, os.stat(out_path).st_size)
diff --git a/tests/containers/__init__.py b/tests/containers/__init__.py
new file mode 100644
index 0000000..1d0ae4d
--- /dev/null
+++ b/tests/containers/__init__.py
@@ -0,0 +1,3 @@
+from tests.containers.test_compound import *
+from tests.containers.test_generic import *
+from tests.containers.test_perturbation_map import *
diff --git a/tests/containers/test_compound.py b/tests/containers/test_compound.py
new file mode 100644
index 0000000..0de273f
--- /dev/null
+++ b/tests/containers/test_compound.py
@@ -0,0 +1,113 @@
+import unittest
+import os
+from copy import deepcopy
+from rdkit import Chem
+
+from icolos.core.containers.compound import Conformer, Enumeration, Compound
+
+from icolos.utils.enums.compound_enums import (
+    CompoundContainerEnum,
+    EnumerationContainerEnum,
+)
+
+from tests.tests_paths import PATHS_EXAMPLEDATA
+from icolos.utils.general.files_paths import attach_root_path
+
+
+class Test_Compound(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._CC = CompoundContainerEnum()
+        cls._EC = EnumerationContainerEnum()
+
+        cls._test_dir = attach_root_path("tests/junk/Compound")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        comp = Compound(name="test_molecule", compound_number=0)
+        enum1 = Enumeration(smile="", molecule=None)
+        self.e1_conf1 = Conformer(conformer_id=0)
+        self.e1_conf2 = Conformer(conformer_id=2)
+        enum1.add_conformer(self.e1_conf1, auto_update=True)
+        enum1.add_conformer(self.e1_conf2, auto_update=True)
+        enum2 = Enumeration(smile="", molecule=None)
+        self.e2_conf1 = Conformer(conformer_id=1)
+        self.e2_conf2 = Conformer(conformer_id=3)
+        self.e2_conf3 = Conformer(conformer_id=5)
+        enum2.add_conformer(self.e2_conf1, auto_update=False)
+        enum2.add_conformer(self.e2_conf2, auto_update=False)
+        enum2.add_conformer(self.e2_conf3, auto_update=False)
+        enum3 = Enumeration(smile="CCC", molecule=None, enumeration_id=4)
+        self.e3_conf1 = Conformer(conformer_id=0)
+        enum3.add_conformer(self.e3_conf1, auto_update=True)
+        comp.add_enumeration(enumeration=enum1, auto_update=True)
+        comp.add_enumeration(enumeration=enum2, auto_update=True)
+        comp.add_enumeration(enumeration=enum3, auto_update=False)
+        self.comp = comp
+        self.enum1 = enum1
+        self.enum2 = enum2
+        self.enum3 = enum3
+
+        _paracetamol_path = attach_root_path(PATHS_EXAMPLEDATA.PARACETAMOL_PATH)
+        mol_supplier = Chem.SDMolSupplier(_paracetamol_path, removeHs=False)
+        for mol in mol_supplier:
+            self._paracetamol_molecule = mol
+        _aspirin_path = attach_root_path(PATHS_EXAMPLEDATA.ASPIRIN_PATH)
+        mol_supplier = Chem.SDMolSupplier(_aspirin_path, removeHs=False)
+        for mol in mol_supplier:
+            self._aspirin_molecule = mol
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_general_handling(self):
+        # Enumeration
+        self.assertEqual(len(self.comp), 3)
+        l_enums = self.comp.get_enumerations()
+        self.assertEqual(l_enums[0].get_compound_object(), self.comp)
+        self.assertEqual(l_enums[1].get_enumeration_id(), 1)
+        self.assertIsNone(l_enums[2].get_compound_object())
+        self.assertEqual(l_enums[2].get_enumeration_id(), 4)
+        self.assertEqual(self.comp[2].get_enumeration_id(), 4)
+
+        self.assertRaises(IndexError, self.comp.find_enumeration, 3)
+        self.assertEqual(
+            self.comp.find_enumeration(enumeration_id=4).get_smile(), "CCC"
+        )
+
+        self.assertListEqual([0, 1, 4], self.comp.get_enumeration_ids())
+        self.comp.reset_enumeration_ids()
+        self.assertListEqual([0, 1, 2], self.comp.get_enumeration_ids())
+
+        # Conformer
+        self.assertEqual(len(self.comp.find_enumeration(1)), 3)
+        self.assertEqual(self.comp[1][1].get_conformer_id(), 3)
+        self.assertListEqual([0, 1], self.comp[0].get_conformer_ids())
+        self.assertListEqual([1, 3, 5], self.comp[1].get_conformer_ids())
+
+        # Deletion
+        self.comp[1].clear_conformers()
+        self.assertEqual(len(self.comp[1]), 0)
+        self.comp.clear_enumerations()
+        self.assertEqual(len(self.comp), 0)
+
+    def test_cloning_and_resetting(self):
+        comp_clone = deepcopy(self.comp)
+        comp_clone[0].set_enumeration_id(10)
+        self.assertListEqual([0, 1, 4], self.comp.get_enumeration_ids())
+        self.assertListEqual([10, 1, 4], comp_clone.get_enumeration_ids())
+
+        all_conf_ids = []
+        for enum in self.comp:
+            for conf in enum:
+                all_conf_ids.append(conf.get_conformer_id())
+        self.assertListEqual([0, 1, 1, 3, 5, 0], all_conf_ids)
+
+        comp_clone.reset_all_ids()
+        all_conf_ids = []
+        for enum in comp_clone:
+            for conf in enum:
+                all_conf_ids.append(conf.get_conformer_id())
+        self.assertListEqual([0, 1, 0, 1, 2, 0], all_conf_ids)
diff --git a/tests/containers/test_generic.py b/tests/containers/test_generic.py
new file mode 100644
index 0000000..207a5d6
--- /dev/null
+++ b/tests/containers/test_generic.py
@@ -0,0 +1,32 @@
+import unittest
+import os
+
+from icolos.core.containers.generic import GenericData, GenericContainer
+
+from tests.tests_paths import PATHS_EXAMPLEDATA
+from icolos.utils.general.files_paths import attach_root_path
+
+
+class Test_Generic(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._GC = GenericContainer()
+
+        cls._test_dir = attach_root_path("tests/junk/Generic")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        gc = GenericContainer()
+        with open(PATHS_EXAMPLEDATA.FEP_PLUS_DOCKING_PV, "rb") as f:
+            data = f.read()
+        gc.add_file(
+            GenericData(file_name="test_file.txt", file_data=data, argument=True)
+        )
+        self.generic = gc
+
+    def test_GenericHandling(self):
+        self.assertEqual(len(self.generic.get_flattened_files()), 1)
+        self.assertEqual(
+            self.generic.get_file_by_name("test_file.txt").get_extension(), "txt"
+        )
diff --git a/tests/containers/test_perturbation_map.py b/tests/containers/test_perturbation_map.py
new file mode 100644
index 0000000..b15ba60
--- /dev/null
+++ b/tests/containers/test_perturbation_map.py
@@ -0,0 +1,44 @@
+from icolos.core.containers.perturbation_map import PerturbationMap
+import unittest
+import os
+from icolos.core.containers.generic import GenericData
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    construct_full_compound_object,
+)
+from icolos.utils.general.files_paths import attach_root_path
+
+
+class Test_PerturbationMap(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+
+        cls._test_dir = attach_root_path("tests/junk/perturbation_map")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        compounds = construct_full_compound_object(
+            PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS,
+        )
+        with open(PATHS_EXAMPLEDATA.FEP_PLUS_PROTEIN, "r") as f:
+            data = f.read()
+        protein = GenericData(file_name="protein.pdb", file_data=data)
+        p_map = PerturbationMap(compounds=compounds, protein=protein)
+
+        p_map.parse_map_file(PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG)
+        self.p_map = p_map
+
+    def test_perturbation_map(self):
+        self.assertEqual(len(self.p_map.nodes), 38)
+        self.assertEqual(len(self.p_map.edges), 62)
+        self.assertEqual(
+            self.p_map.nodes[5].get_conformer().get_enumeration_object().get_smile(),
+            "[H]c1nc(N([H])c2c([H])c(C(=O)N([H])[H])c([H])c(N([H])S(=O)(=O)C([H])([H])[H])c2[H])nc(N([H])c2c(Cl)c([H])c([H])c3c2OC([H])([H])O3)c1[H]",
+        )
+
+    def test_vis_map(self):
+        self.p_map.visualise_perturbation_map(self._test_dir)
+        filepath = os.path.join(self._test_dir, "vmap.html")
+        stat_inf = os.stat(filepath)
+        self.assertGreater(stat_inf.st_size, 13300)
diff --git a/tests/cosmo/__init__.py b/tests/cosmo/__init__.py
new file mode 100644
index 0000000..9010319
--- /dev/null
+++ b/tests/cosmo/__init__.py
@@ -0,0 +1 @@
+from tests.cosmo.test_Cosmo import *
diff --git a/tests/cosmo/test_Cosmo.py b/tests/cosmo/test_Cosmo.py
new file mode 100644
index 0000000..6d0eb22
--- /dev/null
+++ b/tests/cosmo/test_Cosmo.py
@@ -0,0 +1,174 @@
+import unittest
+import os
+
+from icolos.core.workflow_steps.calculation.cosmo import StepCosmo
+
+from icolos.utils.enums.step_enums import StepBaseEnum
+from icolos.utils.enums.program_parameters import TurbomoleEnum
+
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    export_unit_test_env_vars,
+    get_mol_as_Compound,
+    get_mol_as_Conformer,
+)
+from icolos.utils.enums.compound_enums import ConformerContainerEnum
+from icolos.utils.general.files_paths import attach_root_path
+
+
+_SBE = StepBaseEnum
+_TE = TurbomoleEnum()
+_CTE = ConformerContainerEnum()
+
+
+class Test_Cosmo(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/Cosmo")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        # initialize a Compound with 1 Enumeration and 2 Conformers (done by OMEGA)
+        _paracetamol_molecule = get_mol_as_Compound(PATHS_EXAMPLEDATA.PARACETAMOL_PATH)
+        conf = get_mol_as_Conformer(PATHS_EXAMPLEDATA.PARACETAMOL_MULTIPLE_CONF)[0]
+        with open(PATHS_EXAMPLEDATA.PARACETAMOL_COSMO, "r") as f:
+            cosmofile = f.readlines()
+        conf.add_extra_data(key=_CTE.EXTRA_DATA_COSMOFILE, data=cosmofile)
+        _paracetamol_molecule[0].add_conformer(conf, auto_update=True)
+        self._paracetamol_molecule = _paracetamol_molecule
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_Cosmo_output_parsing(self):
+        step_conf = {
+            _SBE.STEPID: "01_cosmo",
+            _SBE.STEP_TYPE: _SBE.STEP_COSMO,
+            _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load COSMOtherm/20.0.0"},
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                }
+            },
+        }
+        cosmo_step = StepCosmo(**step_conf)
+        cosmo_step.data.compounds = [self._paracetamol_molecule]
+        cosmo_output_path = PATHS_EXAMPLEDATA.PARACETAMOL_COSMO_OUTPUT
+        cosmo_step._parse_output(
+            path_output=cosmo_output_path, conformer=cosmo_step.get_compounds()[0][0][0]
+        )
+
+        # test general block
+        self.assertEqual(
+            cosmo_step.get_compounds()[0][0][0].get_molecule().GetProp("E_cosmo"),
+            "-943302.2152",
+        )
+
+        # test solvent blocks
+        self.assertEqual(
+            cosmo_step.get_compounds()[0][0][0].get_molecule().GetProp("Gsolv_meoh"),
+            "-24.59517",
+        )
+        self.assertEqual(
+            cosmo_step.get_compounds()[0][0][0].get_molecule().GetProp("Gsolv_h2o"),
+            "-23.47666",
+        )
+        self.assertEqual(
+            cosmo_step.get_compounds()[0][0][0].get_molecule().GetProp("G_propanone"),
+            "-943303.47354",
+        )
+        try:
+            self.assertEqual(
+                cosmo_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetProp("G_propanonee"),
+                "",
+            )
+        except KeyError as e:
+            self.assertEqual("'G_propanonee'", str(e))
+
+    def test_Cosmo_run(self):
+        step_conf = {
+            _SBE.STEPID: "01_cosmo",
+            _SBE.STEP_TYPE: _SBE.STEP_COSMO,
+            _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load COSMOtherm/20.0.0"},
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _TE.CT_CONFIG: [
+                            'ctd = BP_TZVPD_FINE_20.ctd cdir = "/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/CTDATA-FILES" ldir = "/opt/scp/software/COSMOtherm/20.0.0/licensefiles"',
+                            "unit notempty wtln ehfile",
+                            "!! generated by COSMOthermX !!",
+                            "f = mol.cosmo",
+                            'f = "h2o_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/h" VPfile',
+                            'f = "methanol_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/m" VPfile',
+                            'f = "1-octanol_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" Comp = "1-octanol" [  VPfile',
+                            'f = "1-octanol_c1.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"',
+                            'f = "1-octanol_c2.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"',
+                            'f = "1-octanol_c3.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"',
+                            'f = "1-octanol_c4.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"',
+                            'f = "1-octanol_c5.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1"',
+                            'f = "1-octanol_c6.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/1" ]',
+                            'f = "dimethylsulfoxide_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/d" VPfile',
+                            'f = "cyclohexane_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/c" VPfile',
+                            'f = "chcl3_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/c" VPfile',
+                            'f = "propanone_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/p" VPfile',
+                            'f = "acetonitrile_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/a" VPfile',
+                            'f = "thf_c0.cosmo" fdir="/opt/scp/software/COSMOtherm/20.0.0/COSMOtherm/DATABASE-COSMO/BP-TZVPD-FINE/t" VPfile',
+                            "henry= 2 tc=25.0 GSOLV",
+                            "henry= 3 tc=25.0 GSOLV",
+                            "henry= 4 tc=25.0 GSOLV",
+                            "henry= 5 tc=25.0 GSOLV",
+                            "henry= 6 tc=25.0 GSOLV",
+                            "henry= 7 tc=25.0 GSOLV",
+                            "henry= 8 tc=25.0 GSOLV",
+                            "henry= 9 tc=25.0 GSOLV",
+                            "henry= 10 tc=25.0 GSOLV",
+                        ]
+                    },
+                }
+            },
+        }
+        cosmo_step = StepCosmo(**step_conf)
+        cosmo_step.data.compounds = [self._paracetamol_molecule]
+
+        # conformer coordinates should not be touched by the execution
+        self.assertListEqual(
+            list(
+                cosmo_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [0.8785, 0.6004, -0.2173],
+        )
+        cosmo_step.execute()
+        self.assertListEqual(
+            list(
+                cosmo_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [0.8785, 0.6004, -0.2173],
+        )
+        self.assertEqual(
+            cosmo_step.get_compounds()[0][0][0].get_molecule().GetProp("Gsolv_h2o"),
+            "-23.47666",
+        )
+        cosmofile = cosmo_step.get_compounds()[0][0][0].get_extra_data()[
+            _CTE.EXTRA_DATA_COSMOFILE
+        ]
+        self.assertTrue("basgrd points=   9806" in cosmofile[5])
+
+        # check write-out
+        out_path = os.path.join(self._test_dir, "cosmo_output_files.sdf")
+        cosmo_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 3079)
diff --git a/tests/esp_sim/__init__.py b/tests/esp_sim/__init__.py
new file mode 100644
index 0000000..2920888
--- /dev/null
+++ b/tests/esp_sim/__init__.py
@@ -0,0 +1 @@
+from tests.esp_sim.test_esp_sim import *
diff --git a/tests/esp_sim/test_esp_sim.py b/tests/esp_sim/test_esp_sim.py
new file mode 100644
index 0000000..9dd1275
--- /dev/null
+++ b/tests/esp_sim/test_esp_sim.py
@@ -0,0 +1,85 @@
+import unittest
+from icolos.core.workflow_steps.calculation.electrostatics.esp_sim import StepEspSim
+
+from icolos.utils.enums.step_enums import StepBaseEnum
+from tests.tests_paths import export_unit_test_env_vars
+
+_SBE = StepBaseEnum()
+
+
+class Test_EspSim(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        pass
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_initialize_compound_SDF(self):
+        step_conf = {
+            _SBE.STEPID: "01_esp_sim",
+            _SBE.STEP_TYPE: _SBE.STEP_ESP_SIM,
+            _SBE.EXEC: {
+                _SBE.EXEC_PARALLELIZATION: {
+                    _SBE.EXEC_PARALLELIZATION_CORES: 8,
+                    _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1,
+                },
+                _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 3},
+            },
+            _SBE.SETTINGS: {_SBE.SETTINGS_ADDITIONAL: {"ref_smiles": "C(C(C(=O)O)O)O"}},
+            _SBE.INPUT: {
+                _SBE.INPUT_COMPOUNDS: [
+                    {
+                        _SBE.INPUT_SOURCE: "C1=CC=C(C=C1)C(C(=O)O)O",
+                        _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STRING,
+                    }
+                ]
+            },
+        }
+        step_esp_sim = StepEspSim(**step_conf)
+        step_esp_sim.generate_input()
+        step_esp_sim.execute()
+
+        esp_sim_score = [
+            0.8112564566774974,
+            0.7940316946620978,
+            0.8157010968264732,
+            0.6927039160490105,
+            0.6709748529493742,
+            0.3780220716995563,
+            0.7933792682013576,
+            0.7672803082385128,
+        ]
+
+        shape_sim_score = [
+            0.6419844502036283,
+            0.9525606469002695,
+            0.5686465433300876,
+            0.5986955029179539,
+            0.5460218408736349,
+            0.5232662864004803,
+            0.8305164319248827,
+            0.7283643892339544,
+        ]
+
+        for i in range(len(esp_sim_score)):
+            self.assertEqual(
+                step_esp_sim.data.compounds[i]
+                .get_enumerations()[0]
+                .get_conformers()[0]
+                .get_molecule()
+                .GetProp("esp_sim"),
+                str(esp_sim_score[i]),
+            )
+            self.assertEqual(
+                step_esp_sim.data.compounds[i]
+                .get_enumerations()[0]
+                .get_conformers()[0]
+                .get_molecule()
+                .GetProp("shape_sim"),
+                str(shape_sim_score[i]),
+            )
diff --git a/tests/feature_counter/__init__.py b/tests/feature_counter/__init__.py
new file mode 100644
index 0000000..f3e9aec
--- /dev/null
+++ b/tests/feature_counter/__init__.py
@@ -0,0 +1 @@
+from tests.feature_counter.test_feature_counter import Test_FeatureCounter
diff --git a/tests/feature_counter/test_feature_counter.py b/tests/feature_counter/test_feature_counter.py
new file mode 100644
index 0000000..04c501c
--- /dev/null
+++ b/tests/feature_counter/test_feature_counter.py
@@ -0,0 +1,102 @@
+import unittest
+
+from icolos.core.containers.compound import Compound, Enumeration
+from icolos.core.workflow_steps.calculation.feature_counter import StepFeatureCounter
+from icolos.utils.enums.program_parameters import FeatureCounterEnum
+
+from icolos.utils.enums.step_enums import StepBaseEnum, StepFeatureCounterEnum
+
+from tests.tests_paths import PATHS_EXAMPLEDATA, get_mol_as_Conformer
+
+_SBE = StepBaseEnum
+_FC = FeatureCounterEnum()
+_SFC = StepFeatureCounterEnum()
+
+
+class Test_FeatureCounter(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        pass
+
+    def setUp(self):
+        comp0 = Compound(compound_number=0)
+        comp1 = Compound(compound_number=1)
+        comp0.add_enumeration(Enumeration(), auto_update=True)
+        comp1.add_enumeration(Enumeration(), auto_update=True)
+        comp0[0].add_conformers(
+            get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS), auto_update=True
+        )
+        comp1[0].add_conformers(
+            get_mol_as_Conformer(PATHS_EXAMPLEDATA.MEDIUM_MOLECULES_SDF_PATH),
+            auto_update=True,
+        )
+        self.comp0 = comp0
+        self.comp1 = comp1
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_ring_counting(self):
+        step_conf = {
+            _SBE.STEPID: "01_feature_counting",
+            _SBE.STEP_TYPE: _SBE.STEP_FEATURE_COUNTER,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {_SFC.FEATURE: _FC.PROPERTY_NUM_RINGS},
+            },
+        }
+
+        fc_step = StepFeatureCounter(**step_conf)
+        fc_step.data.compounds = [self.comp0, self.comp1]
+
+        fc_step.execute()
+
+        self.assertEqual(
+            fc_step.get_compounds()[0][0][0]
+            .get_molecule()
+            .GetProp(_FC.PROPERTY_NUM_RINGS),
+            "2",
+        )
+        self.assertEqual(
+            fc_step.get_compounds()[1][0][1]
+            .get_molecule()
+            .GetProp(_FC.PROPERTY_NUM_RINGS),
+            "2",
+        )
+
+    def test_aromatic_ring_counting(self):
+        step_conf = {
+            _SBE.STEPID: "01_feature_counting",
+            _SBE.STEP_TYPE: _SBE.STEP_FEATURE_COUNTER,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SFC.FEATURE: _FC.PROPERTY_NUM_AROMATIC_RINGS
+                },
+            },
+        }
+
+        fc_step = StepFeatureCounter(**step_conf)
+        fc_step.data.compounds = [self.comp0, self.comp1]
+
+        fc_step.execute()
+
+        self.assertEqual(
+            fc_step.get_compounds()[0][0][0]
+            .get_molecule()
+            .GetProp(_FC.PROPERTY_NUM_AROMATIC_RINGS),
+            "2",
+        )
+        self.assertEqual(
+            fc_step.get_compounds()[1][0][1]
+            .get_molecule()
+            .GetProp(_FC.PROPERTY_NUM_AROMATIC_RINGS),
+            "1",
+        )
diff --git a/tests/flow_control/__init__.py b/tests/flow_control/__init__.py
new file mode 100644
index 0000000..4d83148
--- /dev/null
+++ b/tests/flow_control/__init__.py
@@ -0,0 +1 @@
+from tests.flow_control.test_iterator import *
diff --git a/tests/flow_control/test_iterator.py b/tests/flow_control/test_iterator.py
new file mode 100644
index 0000000..89807b6
--- /dev/null
+++ b/tests/flow_control/test_iterator.py
@@ -0,0 +1,197 @@
+import unittest
+from icolos.core.flow_control.iterator import StepIterator
+from icolos.core.workflow_steps.step import StepBase
+from icolos.utils.enums.step_enums import StepBaseEnum, StepTurbomoleEnum
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.utils.enums.program_parameters import TurbomoleEnum
+import os
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    MAIN_CONFIG,
+)
+
+_SBE = StepBaseEnum
+_TE = TurbomoleEnum()
+_STE = StepTurbomoleEnum()
+
+
+class TestIterator(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/iterator")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self) -> None:
+        with open(PATHS_EXAMPLEDATA.GROMACS_HOLO_STRUCTURE_GRO, "r") as f:
+            self.structure = f.read()
+
+        with open(PATHS_EXAMPLEDATA.MMPBSA_TOP, "r") as f:
+            self.topol = f.read()
+
+        with open(PATHS_EXAMPLEDATA.MMPBSA_TPR, "rb") as f:
+            self.tpr_file = f.read()
+
+        with open(PATHS_EXAMPLEDATA.MMPBSA_XTC, "rb") as f:
+            self.xtc_file = f.read()
+
+        with open(PATHS_EXAMPLEDATA.MMPBSA_POSRE, "r") as f:
+            self.posre = f.read()
+
+        with open(PATHS_EXAMPLEDATA.MMPBSA_LIG_ITP, "r") as f:
+            self.lig_itp = f.read()
+
+        with open(PATHS_EXAMPLEDATA.MMPBSA_LIG_POSRE, "r") as f:
+            self.lig_posre = f.read()
+
+    def test_single_initialization(self):
+
+        full_conf = {
+            "base_config": [
+                {
+                    _SBE.STEPID: "01_turbomole",
+                    _SBE.STEP_TYPE: _SBE.STEP_TURBOMOLE,
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load turbomole/73",
+                        _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 1},
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            _TE.TM_CONFIG_DIR: MAIN_CONFIG["TURBOMOLE_CONFIG"],
+                            _TE.TM_CONFIG_COSMO: os.path.join(
+                                MAIN_CONFIG["TURBOMOLE_CONFIG"], "cosmoprep_eps80.tm"
+                            ),
+                            _STE.EXECUTION_MODE: _TE.TM_RIDFT,
+                        },
+                    },
+                }
+            ],
+            "iter_settings": {
+                "settings": {
+                    "01_turbomole": {
+                        _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                        _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            _TE.TM_CONFIG_BASENAME: [
+                                "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge",
+                                "blyp-ri-d3-def2-svp-int-coarse-charge2",
+                                "some_other_spicy_functional",
+                            ]
+                        },
+                    }
+                },
+                "iter_mode": "single",
+                "n_iters": 3,  # for now this is manual, should match the number of settings to iterate over
+            },
+        }
+
+        step_iterator = StepIterator(**full_conf)
+        self.assertEqual(len(step_iterator.initialized_steps), 3)
+        for i in step_iterator.initialized_steps:
+            assert isinstance(i, StepBase)
+
+    def test_multi_iter_initialization(self):
+
+        full_conf = {
+            "base_config": [
+                {
+                    _SBE.STEPID: "01_turbomole",
+                    _SBE.STEP_TYPE: _SBE.STEP_TURBOMOLE,
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load turbomole/73",
+                        _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 1},
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            _TE.TM_CONFIG_DIR: MAIN_CONFIG["TURBOMOLE_CONFIG"],
+                            _TE.TM_CONFIG_COSMO: os.path.join(
+                                MAIN_CONFIG["TURBOMOLE_CONFIG"], "cosmoprep_eps80.tm"
+                            ),
+                            _TE.TM_CONFIG_BASENAME: "b97-3c-ri-d3-def2-mtzvp-int-nosym-charge",
+                            _STE.EXECUTION_MODE: _TE.TM_RIDFT,
+                        },
+                    },
+                }
+            ],
+            "iter_settings": {
+                # no changes in config, just run the same step through multiple times
+                "iter_mode": "n_iters",
+                "n_iters": 5,  # for now this is manual, should match the number of settings to iterate over
+            },
+        }
+
+        step_iterator = StepIterator(**full_conf)
+        self.assertEqual(len(step_iterator.initialized_steps), 5)
+        for i in step_iterator.initialized_steps:
+            assert isinstance(i, StepBase)
+
+    def test_single_initialization_parallel_execution(self):
+        """
+        Test running multiple steps in parallel
+        """
+
+        full_conf = {
+            "base_config": [
+                {
+                    _SBE.STEPID: "test_mmgbsa",
+                    _SBE.STEP_TYPE: _SBE.STEP_GMX_MMPBSA,
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2 && module load gmx_MMPBSA/1.3.3-fosscuda-2019a-Python-3.7.2"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "make_ndx_command": "Protein Other",
+                            "pipe_input": "Protein Other",
+                        },
+                    },
+                }
+            ],
+            "iter_settings": {
+                "n_iters": 4,  # for now this is manual, should match the number of settings to iterate over
+                "parallelizer_settings": {
+                    "parallelize": True,
+                    "cores": 4,
+                    "max_lenth_sublists": 1,
+                },
+            },
+        }
+
+        step_mmpbsa_job_control = StepIterator(**full_conf)
+        # step_mmpbsa.data.generic.add_file(
+        #     GenericData(file_name="structure.gro", file_data=self.structure)
+        # )
+        # step_mmpbsa.data.generic.add_file(
+        #     GenericData(file_name="topol.top", file_data=self.topol)
+        # )
+        # step_mmpbsa.data.generic.add_file(
+        #     GenericData(file_name="structure.xtc", file_data=self.xtc_file)
+        # )
+        # step_mmpbsa.data.generic.add_file(
+        #     GenericData(file_name="structure.tpr", file_data=self.tpr_file)
+        # )
+        # step_mmpbsa.data.generic.add_file(
+        #     GenericData(file_name="posre.itp", file_data=self.posre)
+        # )
+        # step_mmpbsa.data.generic.add_file(
+        #     GenericData(file_name="DMP:100.itp", file_data=self.lig_itp)
+        # )
+        # step_mmpbsa.data.generic.add_file(
+        #     GenericData(file_name="posre_DMP:100.itp", file_data=self.lig_posre)
+        # )
+
+        # should return JobControl object
+        assert isinstance(step_mmpbsa_job_control.initialized_steps, StepBase)
+        # TODO: there isn't really a good way to unit test this, it is a pain to load the data in to the individual steps
+        # step_mmpbsa_job_control.initialized_steps.execute()
diff --git a/tests/gromacs/__init__.py b/tests/gromacs/__init__.py
new file mode 100644
index 0000000..566d8ae
--- /dev/null
+++ b/tests/gromacs/__init__.py
@@ -0,0 +1,14 @@
+from tests.gromacs.test_editconf import *
+from tests.gromacs.test_genion import *
+from tests.gromacs.test_solvate import *
+from tests.gromacs.test_mdrun import *
+from tests.gromacs.test_pdb2gmx import *
+from tests.gromacs.test_grompp import *
+from tests.gromacs.test_trjconv import *
+from tests.gromacs.test_cluster import *
+from tests.gromacs.test_mmpbsa import *
+from tests.gromacs.test_cluster_ts import *
+
+# from tests.gromacs.test_do_dssp import *
+from tests.gromacs.test_trjcat import *
+from tests.gromacs.test_rmsd import *
diff --git a/tests/gromacs/test_cluster.py b/tests/gromacs/test_cluster.py
new file mode 100644
index 0000000..26f4a81
--- /dev/null
+++ b/tests/gromacs/test_cluster.py
@@ -0,0 +1,70 @@
+from icolos.core.containers.generic import GenericData
+from icolos.core.workflow_steps.gromacs.cluster import StepGMXCluster
+import unittest
+import os
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars
+from icolos.utils.general.files_paths import attach_root_path
+
+SGE = StepGromacsEnum()
+SBE = StepBaseEnum
+
+
+class Test_Cluster(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/gromacs")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_XTC), "rb") as f:
+            self.xtc = f.read()
+
+        with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_TPR_TRJCONV), "rb") as f:
+            self.tpr = f.read()
+
+        with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_STRUCTURE_FILE), "r") as f:
+            self.structure = f.read()
+
+    def test_cluster(self):
+        step_conf = {
+            SBE.STEPID: "test_cluster",
+            SBE.STEP_TYPE: "cluster",
+            SBE.EXEC: {
+                SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+            },
+            SBE.SETTINGS: {
+                SBE.SETTINGS_ARGUMENTS: {
+                    SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        "-dt": "1000",
+                        "-n": "index.ndx",
+                    },
+                },
+                SBE.SETTINGS_ADDITIONAL: {
+                    SBE.PIPE_INPUT: "2 System",
+                    SGE.MAKE_NDX_COMMAND: "1 & a P",
+                },
+            },
+        }
+
+        step_cluster = StepGMXCluster(**step_conf)
+        step_cluster.data.generic.add_file(
+            GenericData(file_name="tmp10249.xtc", file_data=self.xtc, argument=True)
+        )
+        step_cluster.data.generic.add_file(
+            GenericData(file_name="tmp03942.tpr", file_data=self.tpr, argument=True)
+        )
+        step_cluster.data.generic.add_file(
+            GenericData(
+                file_name="structure.gro", file_data=self.structure, argument=True
+            )
+        )
+        step_cluster.execute()
+        out_path = os.path.join(self._test_dir, "clusters.pdb")
+        step_cluster.write_generic_by_extension(self._test_dir, "pdb")
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 2002553)
diff --git a/tests/gromacs/test_cluster_ts.py b/tests/gromacs/test_cluster_ts.py
new file mode 100644
index 0000000..7396ae7
--- /dev/null
+++ b/tests/gromacs/test_cluster_ts.py
@@ -0,0 +1,55 @@
+from icolos.core.workflow_steps.gromacs.clusters_ts import StepClusterTS
+from icolos.core.containers.generic import GenericData
+import unittest
+import os
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars
+from icolos.utils.general.files_paths import attach_root_path
+
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+
+
+class Test_ts_cluster(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/gromacs")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_TS_CLUSTERS), "r") as f:
+            self.data = f.read()
+
+    def test_ts_cluster(self):
+        step_conf = {
+            _SBE.STEPID: "test_ts_cluster",
+            _SBE.STEP_TYPE: "ts_cluster",
+            _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load R"},
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        "lengths": "10001",
+                        "clustersNumber": "13",
+                        "mdEngine": "GROMACS",
+                    },
+                }
+            },
+        }
+
+        step_ts_cluster = StepClusterTS(**step_conf)
+        step_ts_cluster.data.generic.add_file(
+            GenericData(
+                file_name="clusters_ts_example.xvg", file_data=self.data, argument=True
+            )
+        )
+
+        step_ts_cluster.execute()
+
+        out_path = os.path.join(self._test_dir, "clusters_ts.png")
+        step_ts_cluster.write_generic_by_name(self._test_dir, "clusters_ts.png")
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 36102)
diff --git a/tests/gromacs/test_do_dssp.py b/tests/gromacs/test_do_dssp.py
new file mode 100644
index 0000000..fb05dce
--- /dev/null
+++ b/tests/gromacs/test_do_dssp.py
@@ -0,0 +1,54 @@
+from icolos.core.containers.generic import GenericData
+import unittest
+import os
+from icolos.core.workflow_steps.gromacs.do_dssp import StepGMXDoDSSP
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars
+from icolos.utils.general.files_paths import attach_root_path
+
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+
+
+class Test_Editconf(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/gromacs")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_DSSP_TPR), "rb") as f:
+            self.structure = f.read()
+        with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_DSSP_XTC), "rb") as f:
+            self.traj = f.read()
+
+    def test_editconf_run(self):
+        step_conf = {
+            _SBE.STEPID: "test_dssp",
+            _SBE.STEP_TYPE: "dssp",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                }
+            },
+        }
+
+        step_do_dssp = StepGMXDoDSSP(**step_conf)
+        step_do_dssp.data.generic.add_file(
+            GenericData(file_name="structure.tpr", file_data=self.structure)
+        )
+        step_do_dssp.data.generic.add_file(
+            GenericData(file_name="traj.xtc", file_data=self.traj)
+        )
+        step_do_dssp.execute()
+        out_path = os.path.join(self._test_dir, "info.dat")
+        step_do_dssp.write_generic_by_name(self._test_dir, "info.dat")
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 22377)
diff --git a/tests/gromacs/test_editconf.py b/tests/gromacs/test_editconf.py
new file mode 100644
index 0000000..ddb7fe8
--- /dev/null
+++ b/tests/gromacs/test_editconf.py
@@ -0,0 +1,54 @@
+from icolos.core.containers.generic import GenericData
+import unittest
+import os
+from icolos.core.workflow_steps.gromacs.editconf import StepGMXEditConf
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars
+from icolos.utils.general.files_paths import attach_root_path
+
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+
+
+class Test_Editconf(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/gromacs")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_STRUCTURE_FILE), "r") as f:
+            self.structure = f.read()
+
+    def test_editconf_run(self):
+        step_conf = {
+            _SBE.STEPID: "test_editconf",
+            _SBE.STEP_TYPE: "editconf",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        "-d": "1.0",
+                        "-bt": "dodecahedron",
+                    },
+                }
+            },
+        }
+
+        step_editconf = StepGMXEditConf(**step_conf)
+        step_editconf.data.generic.add_file(
+            GenericData(
+                file_name="structure.gro", file_data=self.structure, argument=True
+            )
+        )
+        step_editconf.execute()
+        out_path = os.path.join(self._test_dir, "structure.gro")
+        step_editconf.write_generic_by_name(self._test_dir, "structure.gro")
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 22377)
diff --git a/tests/gromacs/test_genion.py b/tests/gromacs/test_genion.py
new file mode 100644
index 0000000..af1ae30
--- /dev/null
+++ b/tests/gromacs/test_genion.py
@@ -0,0 +1,59 @@
+from icolos.core.containers.generic import GenericData
+from icolos.core.workflow_steps.gromacs.genion import StepGMXGenion
+import unittest
+import os
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars
+from icolos.utils.general.files_paths import attach_root_path
+
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+
+
+class Test_Genion(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/gromacs")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_TOPOL_FILE), "r") as f:
+            self.topol = f.read()
+        with open(attach_root_path(PATHS_EXAMPLEDATA.GROMACS_TPR_FILE), "rb") as f:
+            self.tpr = f.read()
+
+    def test_genion_run(self):
+        step_conf = {
+            _SBE.STEPID: "test_genion",
+            _SBE.STEP_TYPE: "genion",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-neutral"],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        "-pname": "NA",
+                        "-nname": "CL",
+                    },
+                },
+                _SBE.SETTINGS_ADDITIONAL: {_SBE.PIPE_INPUT: "3"},
+            },
+        }
+
+        step_genion = StepGMXGenion(**step_conf)
+        step_genion.data.generic.add_file(
+            GenericData(file_name="topol.top", file_data=self.topol, argument=True)
+        )
+        step_genion.data.generic.add_file(
+            GenericData(file_name="structure.tpr", file_data=self.tpr, argument=True)
+        )
+        step_genion.execute()
+
+        out_path = os.path.join(self._test_dir, "structure.gro")
+        step_genion.write_generic_by_name(self._test_dir, "structure.gro")
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 874941)
diff --git a/tests/gromacs/test_grompp.py b/tests/gromacs/test_grompp.py
new file mode 100644
index 0000000..c536750
--- /dev/null
+++ b/tests/gromacs/test_grompp.py
@@ -0,0 +1,72 @@
+from icolos.core.containers.generic import GenericData
+import unittest
+import os
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars
+from icolos.core.workflow_steps.gromacs.grompp import StepGMXGrompp
+from icolos.utils.general.files_paths import attach_root_path
+
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+
+
+class Test_Grompp(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/gromacs")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        with open(PATHS_EXAMPLEDATA.GROMACS_GROMPP_INPUT_STRUCTURE, "r") as f:
+            self.structure = f.read()
+        with open(PATHS_EXAMPLEDATA.GROMACS_IONS_MDP, "r") as f:
+            self.mdp = f.read()
+        with open(PATHS_EXAMPLEDATA.GROMACS_GROMPP_TOPOL, "r") as f:
+            self.topol = f.read()
+
+    def test_grompp(self):
+        step_conf = {
+            _SBE.STEPID: "test_grompp",
+            _SBE.STEP_TYPE: "grompp",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SGE.FIELDS: {
+                        "nsteps": 50,
+                        "-nsteeps": 123,
+                    },  # deliberate typo to check warning
+                    _SGE.FORCEFIELD: "/projects/cc/mai/material/Icolos/forcefields/charmm36-feb2021.ff",
+                    "-r": False,
+                    _SGE.MAKE_NDX_COMMAND: "auto",
+                },
+            },
+        }
+
+        step_grompp = StepGMXGrompp(**step_conf)
+        step_grompp.data.generic.add_file(
+            GenericData(
+                file_name="tmp029389.gro", file_data=self.structure, argument=True
+            )
+        )
+        step_grompp.data.generic.add_file(
+            GenericData(file_name="tmp03394.mdp", file_data=self.mdp, argument=True)
+        )
+        step_grompp.data.generic.add_file(
+            GenericData(file_name="tmp91023.top", file_data=self.topol, argument=True)
+        )
+
+        step_grompp.execute()
+
+        out_path = os.path.join(self._test_dir, "structure.tpr")
+        step_grompp.write_generic_by_name(self._test_dir, "structure.tpr")
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 596160)
diff --git a/tests/gromacs/test_mdrun.py b/tests/gromacs/test_mdrun.py
new file mode 100644
index 0000000..bc8abdd
--- /dev/null
+++ b/tests/gromacs/test_mdrun.py
@@ -0,0 +1,70 @@
+from icolos.core.containers.generic import GenericData
+import unittest
+import os
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.core.workflow_steps.gromacs.mdrun import StepGMXMDrun
+
+
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+
+
+class Test_MDrun(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/gromacs")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        with open(PATHS_EXAMPLEDATA.GROMACS_TPR_FILE, "rb") as f:
+            self.tpr = f.read()
+
+    def test_mdrun(self):
+        step_conf = {
+            _SBE.STEPID: "test_mdrun",
+            _SBE.STEP_TYPE: "mdrun",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+            },
+        }
+
+        step_mdrun = StepGMXMDrun(**step_conf)
+        step_mdrun.data.generic.add_file(
+            GenericData(file_name="structure.tpr", file_data=self.tpr, argument=True)
+        )
+        step_mdrun.execute()
+
+        out_path = os.path.join(self._test_dir, "structure.gro")
+        step_mdrun.write_generic_by_extension(self._test_dir, "gro")
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 874941)
+
+    def test_mdrun_slurm(self):
+        step_conf = {
+            _SBE.STEPID: "test_mdrun",
+            _SBE.STEP_TYPE: "mdrun",
+            _SBE.EXEC: {
+                _SBE.EXEC_RESOURCE: "slurm",
+                _SBE.EXEC_JOB_CONTROL: {
+                    _SBE.EXEC_JOB_CONTROL_PARTITION: "gpu",
+                    _SBE.EXEC_JOB_CONTROL_GRES: "gpu:1",
+                    _SBE.EXEC_JOB_CONTROL_MODULES: ["GROMACS/2020.3-fosscuda-2019a"],
+                },
+            },
+        }
+
+        step_mdrun = StepGMXMDrun(**step_conf)
+        step_mdrun.data.generic.add_file(
+            GenericData(file_name="structure.tpr", file_data=self.tpr, argument=True)
+        )
+        step_mdrun.execute()
+
+        out_path = os.path.join(self._test_dir, "structure.gro")
+        step_mdrun.write_generic_by_extension(self._test_dir, "gro")
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 874941)
diff --git a/tests/gromacs/test_mmpbsa.py b/tests/gromacs/test_mmpbsa.py
new file mode 100644
index 0000000..a28d02d
--- /dev/null
+++ b/tests/gromacs/test_mmpbsa.py
@@ -0,0 +1,142 @@
+from icolos.core.workflow_steps.gromacs.mmpbsa import StepGMXmmpbsa
+from icolos.core.containers.generic import GenericData
+import unittest
+import os
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars, MAIN_CONFIG
+from icolos.utils.general.files_paths import attach_root_path
+from time import time
+
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+
+
+class Test_MMPBSA(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls._test_dir = attach_root_path("tests/junk/gromacs")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self) -> None:
+        with open(PATHS_EXAMPLEDATA.GROMACS_HOLO_STRUCTURE_GRO, "r") as f:
+            self.structure = f.read()
+
+        with open(PATHS_EXAMPLEDATA.MMPBSA_TOP, "r") as f:
+            self.topol = f.read()
+
+        with open(PATHS_EXAMPLEDATA.MMPBSA_TPR, "rb") as f:
+            self.tpr_file = f.read()
+
+        with open(PATHS_EXAMPLEDATA.MMPBSA_XTC, "rb") as f:
+            self.xtc_file = f.read()
+
+        with open(PATHS_EXAMPLEDATA.MMPBSA_POSRE, "rb") as f:
+            self.posre = f.read()
+
+        with open(PATHS_EXAMPLEDATA.MMPBSA_LIG_ITP, "rb") as f:
+            self.lig_itp = f.read()
+
+        with open(PATHS_EXAMPLEDATA.MMPBSA_LIG_POSRE, "rb") as f:
+            self.lig_posre = f.read()
+
+    def test_protein_lig_single_traj(self):
+        step_conf = {
+            _SBE.STEPID: "test_gmmpbsa",
+            _SBE.STEP_TYPE: "gmx_mmpbsa",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a && module load gmx_MMPBSA && module load AmberTools/21-fosscuda-2019a-Python-3.7.2"
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SGE.FORCEFIELD: MAIN_CONFIG["FORCEFIELD"],
+                    _SGE.COUPLING_GROUPS: "Protein Other",
+                },
+            },
+        }
+        step_mmpbsa = StepGMXmmpbsa(**step_conf)
+        step_mmpbsa.data.generic.add_file(
+            GenericData(file_name="structure.gro", file_data=self.structure)
+        )
+        step_mmpbsa.data.generic.add_file(
+            GenericData(file_name="topol.top", file_data=self.topol)
+        )
+        step_mmpbsa.data.generic.add_file(
+            GenericData(file_name="structure.xtc", file_data=self.xtc_file)
+        )
+        step_mmpbsa.data.generic.add_file(
+            GenericData(file_name="structure.tpr", file_data=self.tpr_file)
+        )
+        step_mmpbsa.data.generic.add_file(
+            GenericData(file_name="posre.itp", file_data=self.posre)
+        )
+        step_mmpbsa.data.generic.add_file(
+            GenericData(file_name="DMP:100.itp", file_data=self.lig_itp)
+        )
+        step_mmpbsa.data.generic.add_file(
+            GenericData(file_name="posre_DMP:100.itp", file_data=self.lig_posre)
+        )
+        step_mmpbsa.execute()
+        out_path = os.path.join(self._test_dir, "FINAL_RESULTS_MMPBSA.dat")
+        step_mmpbsa.write_generic_by_extension(self._test_dir, "dat")
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 5570)
+
+    def test_protein_lig_single_traj_custom_file(self):
+
+        step_conf = {
+            _SBE.STEPID: "test_gmmpbsa",
+            _SBE.STEP_TYPE: "gmx_mmpbsa",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a && module load gmx_MMPBSA && module load AmberTools/21-fosscuda-2019a-Python-3.7.2"
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SGE.FORCEFIELD: MAIN_CONFIG["FORCEFIELD"],
+                    _SGE.COUPLING_GROUPS: "Protein Other",
+                    _SGE.INPUT_FILE: PATHS_EXAMPLEDATA.MMPBSA_CUSTOM_INPUT,
+                    "ntasks": 2,
+                },
+            },
+        }
+        step_mmpbsa = StepGMXmmpbsa(**step_conf)
+        step_mmpbsa.data.generic.add_file(
+            GenericData(file_name="structure.gro", file_data=self.structure)
+        )
+        step_mmpbsa.data.generic.add_file(
+            GenericData(file_name="topol.top", file_data=self.topol)
+        )
+        step_mmpbsa.data.generic.add_file(
+            GenericData(file_name="structure.xtc", file_data=self.xtc_file)
+        )
+        step_mmpbsa.data.generic.add_file(
+            GenericData(file_name="structure.tpr", file_data=self.tpr_file)
+        )
+        step_mmpbsa.data.generic.add_file(
+            GenericData(file_name="posre.itp", file_data=self.posre)
+        )
+        step_mmpbsa.data.generic.add_file(
+            GenericData(file_name="DMP:100.itp", file_data=self.lig_itp)
+        )
+        step_mmpbsa.data.generic.add_file(
+            GenericData(file_name="posre_DMP:100.itp", file_data=self.lig_posre)
+        )
+        t1 = time()
+        step_mmpbsa.execute()
+        exec_time = time() - t1
+        print("single traj exec time, custom input", exec_time)
+        out_path = os.path.join(self._test_dir, "FINAL_RESULTS_MMPBSA.dat")
+        step_mmpbsa.write_generic_by_extension(self._test_dir, "dat")
+        stat_inf = os.stat(out_path)
+
+        self.assertGreater(stat_inf.st_size, 4680)
diff --git a/tests/gromacs/test_pdb2gmx.py b/tests/gromacs/test_pdb2gmx.py
new file mode 100644
index 0000000..641abe8
--- /dev/null
+++ b/tests/gromacs/test_pdb2gmx.py
@@ -0,0 +1,91 @@
+from icolos.core.containers.generic import GenericData
+import unittest
+import os
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.core.workflow_steps.gromacs.pdb2gmx import StepGMXPdb2gmx
+
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+
+
+class Test_Pdb2gmx(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/gromacs")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        with open(PATHS_EXAMPLEDATA.GROMACS_PDB_FILE, "r") as f:
+            self.structure = f.read()
+        with open(PATHS_EXAMPLEDATA.GROMACS_HOLO_STRUCTURE, "r") as f:
+            self.holo_structure = f.read()
+
+    def test_pdb2gmx_run(self):
+        step_conf = {
+            _SBE.STEPID: "test_pdb2gmx",
+            _SBE.STEP_TYPE: "pdb2gmx",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-ignh"],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        "-water": "tip4p",
+                        "-ff": "amber03",
+                    },
+                }
+            },
+        }
+
+        step_pdb2gmx = StepGMXPdb2gmx(**step_conf)
+        step_pdb2gmx.data.generic.add_file(
+            GenericData(
+                file_name="structure.pdb", file_data=self.structure, argument=True
+            )
+        )
+        step_pdb2gmx.execute()
+        out_path = os.path.join(self._test_dir, "structure.gro")
+        step_pdb2gmx.write_generic_by_extension(
+            self._test_dir, _SGE.FIELD_KEY_STRUCTURE
+        )
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 22300)
+
+    def test_lig_param(self):
+        step_conf = {
+            _SBE.STEPID: "test_pdb2gmx_lig_param",
+            _SBE.STEP_TYPE: "pdb2gmx_lig",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ADDITIONAL: {_SBE.PIPE_INPUT: "echo 3"},
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-ignh"],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        "-water": "tip4p",
+                        "-ff": "amber03",
+                    },
+                },
+            },
+        }
+
+        step_lig_param = StepGMXPdb2gmx(**step_conf)
+        step_lig_param.data.generic.add_file(
+            GenericData(
+                file_name="tmp_whatever01923.pdb", file_data=self.holo_structure
+            )
+        )
+        step_lig_param.execute()
+        out_path = os.path.join(self._test_dir, "structure.gro")
+        step_lig_param.write_generic_by_extension(
+            self._test_dir, _SGE.FIELD_KEY_STRUCTURE
+        )
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 73800)
diff --git a/tests/gromacs/test_removelig.py b/tests/gromacs/test_removelig.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/tests/gromacs/test_removelig.py
@@ -0,0 +1 @@
+
diff --git a/tests/gromacs/test_rmsd.py b/tests/gromacs/test_rmsd.py
new file mode 100644
index 0000000..15d9148
--- /dev/null
+++ b/tests/gromacs/test_rmsd.py
@@ -0,0 +1,56 @@
+from icolos.core.containers.generic import GenericData
+import unittest
+import os
+from icolos.core.workflow_steps.gromacs.rsmd import StepGMXrmsd
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    export_unit_test_env_vars,
+    get_docked_ligands_as_conformers,
+)
+from icolos.utils.general.files_paths import attach_root_path
+
+SGE = StepGromacsEnum()
+SBE = StepBaseEnum
+
+
+class Test_Trjcat(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/gromacs")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        with open(PATHS_EXAMPLEDATA.GROMACS_DMP_LIGAND_TRJ, "rb") as f:
+            self.xtc = f.read()
+
+        # load the docked pose as a compound
+        self.comp = get_docked_ligands_as_conformers(
+            PATHS_EXAMPLEDATA.GROMACS_DMP_LIGAND_SDF
+        )
+        print(self.comp)
+
+    def test_gmx_rmsd(self):
+        step_conf = {
+            SBE.STEPID: "test_gmx_rmsd",
+            SBE.STEP_TYPE: "gmx_rmsd",
+            SBE.EXEC: {
+                SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+            },
+            SBE.SETTINGS: {},
+        }
+
+        step_rmsd = StepGMXrmsd(**step_conf)
+        step_rmsd.data.generic.add_file(
+            GenericData(file_name="structure.xtc", file_data=self.xtc, argument=True)
+        )
+        step_rmsd.data.compounds = self.comp
+
+        step_rmsd.execute()
+        out_path = os.path.join(self._test_dir, "rmsd.xvg")
+        step_rmsd.write_generic_by_extension(self._test_dir, "xvg")
+        stat_inf = os.stat(out_path)
+        self.assertAlmostEqual(stat_inf.st_size, 3220, delta=100)
diff --git a/tests/gromacs/test_solvate.py b/tests/gromacs/test_solvate.py
new file mode 100644
index 0000000..1dd1e7a
--- /dev/null
+++ b/tests/gromacs/test_solvate.py
@@ -0,0 +1,60 @@
+from icolos.core.containers.generic import GenericData
+import unittest
+import os
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.core.workflow_steps.gromacs.solvate import StepGMXSolvate
+
+_SBE = StepBaseEnum
+_SGE = StepGromacsEnum()
+
+
+class Test_Solvate(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/gromacs")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        with open(PATHS_EXAMPLEDATA.GROMACS_TOPOL_FILE, "r") as f:
+            self.topol = f.read()
+        with open(PATHS_EXAMPLEDATA.GROMACS_STRUCTURE_FILE, "r") as f:
+            self.structure = f.read()
+
+    def test_solvate(self):
+        step_conf = {
+            _SBE.STEPID: "test_solvate",
+            _SBE.STEP_TYPE: "solvate",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                }
+            },
+        }
+
+        step_solvate = StepGMXSolvate(**step_conf)
+        step_solvate.data.generic.add_file(
+            GenericData(
+                file_name="structure.gro", file_data=self.structure, argument=True
+            )
+        )
+        step_solvate.data.generic.add_file(
+            GenericData(file_name="topol.top", file_data=self.topol, argument=True)
+        )
+
+        step_solvate.execute()
+
+        out_path = os.path.join(self._test_dir, "structure.gro")
+        step_solvate.write_generic_by_extension(
+            self._test_dir, _SGE.FIELD_KEY_STRUCTURE
+        )
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 650000)
diff --git a/tests/gromacs/test_trjcat.py b/tests/gromacs/test_trjcat.py
new file mode 100644
index 0000000..ebc3709
--- /dev/null
+++ b/tests/gromacs/test_trjcat.py
@@ -0,0 +1,56 @@
+from icolos.core.containers.generic import GenericData
+import unittest
+import os
+from icolos.core.workflow_steps.gromacs.trajcat import StepGMXTrjcat
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars
+from icolos.utils.general.files_paths import attach_root_path
+
+SGE = StepGromacsEnum()
+SBE = StepBaseEnum
+
+
+class Test_Trjcat(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/gromacs")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        with open(PATHS_EXAMPLEDATA.GROMACS_XTC, "rb") as f:
+            self.xtc = f.read()
+
+        with open(PATHS_EXAMPLEDATA.GROMACS_TPR_TRJCONV, "rb") as f:
+            self.tpr = f.read()
+
+    def test_trjconv(self):
+        step_conf = {
+            SBE.STEPID: "test_trjcat",
+            SBE.STEP_TYPE: "trjcat",
+            SBE.EXEC: {
+                SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+            },
+            SBE.SETTINGS: {},
+        }
+
+        step_trjconv = StepGMXTrjcat(**step_conf)
+        step_trjconv.data.generic.add_file(
+            GenericData(file_name="structure.xtc", file_data=self.xtc, argument=True)
+        )
+        step_trjconv.data.generic.add_file(
+            GenericData(file_name="structure_2.xtc", file_data=self.xtc, argument=True)
+        )
+        step_trjconv.data.generic.add_file(
+            GenericData(file_name="structure_3.xtc", file_data=self.xtc, argument=True)
+        )
+        # step_trjconv.data.generic.add_file(
+        #     GenericData(file_name="structure.tpr", file_data=self.tpr, argument=True)
+        # )
+        step_trjconv.execute()
+        out_path = os.path.join(self._test_dir, "trjcat_out.xtc")
+        step_trjconv.write_generic_by_extension(self._test_dir, "xtc")
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 30088548)
diff --git a/tests/gromacs/test_trjconv.py b/tests/gromacs/test_trjconv.py
new file mode 100644
index 0000000..4969451
--- /dev/null
+++ b/tests/gromacs/test_trjconv.py
@@ -0,0 +1,53 @@
+from icolos.core.containers.generic import GenericData
+import unittest
+import os
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.core.workflow_steps.gromacs.trjconv import StepGMXTrjconv
+
+SGE = StepGromacsEnum()
+SBE = StepBaseEnum
+
+
+class Test_Trjconv(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/gromacs")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        with open(PATHS_EXAMPLEDATA.GROMACS_XTC, "rb") as f:
+            self.xtc = f.read()
+
+        with open(PATHS_EXAMPLEDATA.GROMACS_TPR_TRJCONV, "rb") as f:
+            self.tpr = f.read()
+
+    def test_trjconv(self):
+        step_conf = {
+            SBE.STEPID: "test_trjconv",
+            SBE.STEP_TYPE: "trjconv",
+            SBE.EXEC: {
+                SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+            },
+            SBE.SETTINGS: {
+                SBE.SETTINGS_ARGUMENTS_FLAGS: ["-center"],
+                SBE.SETTINGS_ADDITIONAL: {SBE.PIPE_INPUT: "echo -ne 1 0"},
+            },
+        }
+
+        step_trjconv = StepGMXTrjconv(**step_conf)
+        step_trjconv.data.generic.add_file(
+            GenericData(file_name="structure.xtc", file_data=self.xtc, argument=True)
+        )
+        step_trjconv.data.generic.add_file(
+            GenericData(file_name="structure.tpr", file_data=self.tpr, argument=True)
+        )
+        step_trjconv.execute()
+        out_path = os.path.join(self._test_dir, "structure.xtc")
+        step_trjconv.write_generic_by_extension(self._test_dir, "xtc")
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 10029516)
diff --git a/tests/integration_tests/__init__.py b/tests/integration_tests/__init__.py
new file mode 100644
index 0000000..862a76e
--- /dev/null
+++ b/tests/integration_tests/__init__.py
@@ -0,0 +1,5 @@
+# from tests.integration_tests.test_fep_plus import *
+from tests.integration_tests.test_docking import *
+from tests.integration_tests.test_gromacs import *
+
+from tests.integration_tests.test_rmsd_iter import *
diff --git a/tests/integration_tests/test_docking.py b/tests/integration_tests/test_docking.py
new file mode 100644
index 0000000..a343b0a
--- /dev/null
+++ b/tests/integration_tests/test_docking.py
@@ -0,0 +1,239 @@
+import unittest
+import os
+from tests.tests_paths import PATHS_EXAMPLEDATA, PATHS_1UYD
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.core.composite_agents.workflow import WorkFlow
+from icolos.utils.enums.composite_agents_enums import WorkflowEnum
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGlideEnum
+
+_WE = WorkflowEnum()
+_SBE = StepBaseEnum
+_SGE = StepGlideEnum()
+
+
+class TestDockingWorkflow(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+
+        cls._test_dir = attach_root_path("tests/junk/integration")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_docking_workflow(self):
+
+        conf = {
+            _WE.HEADER: {
+                _WE.ID: "NIBR",
+                _WE.ENVIRONMENT: {
+                    _WE.ENVIRONMENT_EXPORT: [
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "OE_LICENSE",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "/opt/scp/software/oelicense/1.0/oe_license.seq1",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "XTBHOME",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "/projects/cc/mai/binaries/xtb-6.4.0",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "XTBPATH",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "${XTBHOME}/share/xtb",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "PATH",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "${PATH}:${XTBHOME}/bin",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "PKG_CONFIG_PATH",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "PARA_ARCH",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "MPI",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "PARNODES",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "6",
+                        },
+                    ]
+                },
+                _WE.GLOBAL_VARIABLES: {
+                    "smiles": "3,4-DIAMINOBENZOTRIFLUORIDE:Nc1ccc(cc1N)C(F)(F)F;aspirin:O=C(C)Oc1ccccc1C(=O)O"
+                },
+            },
+            _WE.STEPS: [
+                {
+                    _SBE.STEPID: "initialization_smile",
+                    _SBE.STEP_TYPE: "initialization",
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "{smiles}",
+                                _SBE.INPUT_SOURCE_TYPE: "string",
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "Ligprep",
+                    _SBE.STEP_TYPE: "ligprep",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws",
+                        _SBE.EXEC_PARALLELIZATION: {
+                            _SBE.EXEC_PARALLELIZATION_CORES: 4,
+                            _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1,
+                        },
+                        _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY: 3},
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-epik"],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                "-ph": 7.0,
+                                "-pht": 1.0,
+                                "-s": 1,
+                                "-bff": 14,
+                            },
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {},
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "initialization_smile",
+                                _SBE.INPUT_SOURCE_TYPE: "step",
+                            }
+                        ]
+                    },
+                    _SBE.WRITEOUT: [
+                        {
+                            _SBE.INPUT_COMPOUNDS: {
+                                _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_ENUMERATIONS
+                            },
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: f"{self._test_dir}/nibr_ligprep.sdf",
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "SDF",
+                            },
+                        }
+                    ],
+                },
+                {
+                    _SBE.STEPID: "Glide",
+                    _SBE.STEP_TYPE: "glide",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws",
+                        _SBE.EXEC_PARALLELIZATION: {
+                            _SBE.EXEC_PARALLELIZATION_CORES: 8,
+                            _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1,
+                        },
+                        _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 3},
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-HOST": "localhost"},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "configuration": {
+                                "AMIDE_MODE": "trans",
+                                "EXPANDED_SAMPLING": "True",
+                                "GRIDFILE": [PATHS_1UYD.GRID_PATH],
+                                "NENHANCED_SAMPLING": "1",
+                                "POSE_OUTTYPE": "ligandlib_sd",
+                                "POSES_PER_LIG": "15",
+                                "POSTDOCK_NPOSE": "25",
+                                "POSTDOCKSTRAIN": "True",
+                                "PRECISION": "SP",
+                                "REWARD_INTRA_HBONDS": "True",
+                            }
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "Ligprep",
+                                _SBE.INPUT_SOURCE_TYPE: "step",
+                            }
+                        ]
+                    },
+                    _SBE.WRITEOUT: [
+                        {
+                            _SBE.INPUT_COMPOUNDS: {
+                                _SBE.WRITEOUT_COMP_CATEGORY: "conformers"
+                            },
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: f"{self._test_dir}/tests/junk/nibr_glide.sdf",
+                                _SBE.WRITEOUT_DESTINATION_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "SDF",
+                            },
+                        }
+                    ],
+                },
+                {
+                    _SBE.STEPID: "Shaep",
+                    _SBE.STEP_TYPE: "shaep",
+                    _SBE.EXEC: {_SBE.EXEC_BINARYLOCATION: "/projects/cc/mai/binaries"},
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: PATHS_EXAMPLEDATA.PANTHER_NEGATIVE_IMAGE,
+                                _SBE.INPUT_EXTENSION: "mol2",
+                            }
+                        ],
+                        _SBE.INPUT_COMPOUNDS: [
+                            {_SBE.INPUT_SOURCE: "Glide", _SBE.INPUT_SOURCE_TYPE: "step"}
+                        ],
+                    },
+                    _SBE.WRITEOUT: [
+                        {
+                            _SBE.INPUT_COMPOUNDS: {
+                                _SBE.WRITEOUT_COMP_CATEGORY: "conformers",
+                                _SBE.WRITEOUT_COMP_SELECTED_TAGS: [
+                                    "shape_similarity",
+                                    "esp_similarity",
+                                ],
+                            },
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: f"{self._test_dir}/tests/junk/nibr_final_all.csv",
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "CSV",
+                            },
+                        },
+                        {
+                            _SBE.INPUT_COMPOUNDS: {
+                                _SBE.WRITEOUT_COMP_CATEGORY: "conformers",
+                                _SBE.WRITEOUT_COMP_SELECTED_TAGS: [
+                                    "shape_similarity",
+                                    "esp_similarity",
+                                ],
+                                _SBE.WRITEOUT_COMP_AGGREGATION: {
+                                    _SBE.WRITEOUT_COMP_AGGREGATION_MODE: "best_per_compound",
+                                    _WE.ENVIRONMENT_EXPORT_KEY: "shape_similarity",
+                                    _SBE.WRITEOUT_COMP_AGGREGATION_HIGHESTISBEST: True,
+                                },
+                            },
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: f"{self._test_dir}/nibr_final_bestpercompound.csv",
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "CSV",
+                            },
+                        },
+                    ],
+                },
+            ],
+        }
+
+        wflow = WorkFlow(**conf)
+        wflow.initialize()
+
+        self.assertEqual(len(wflow.steps), 4)
+
+        wflow.execute()
+
+        out_path = os.path.join(self._test_dir, "nibr_final_bestpercompound.csv")
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 110)
diff --git a/tests/integration_tests/test_fep_plus.py b/tests/integration_tests/test_fep_plus.py
new file mode 100644
index 0000000..fd695db
--- /dev/null
+++ b/tests/integration_tests/test_fep_plus.py
@@ -0,0 +1,224 @@
+import unittest
+import os
+from tests.tests_paths import PATHS_EXAMPLEDATA
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.core.composite_agents.workflow import WorkFlow
+from icolos.utils.enums.composite_agents_enums import WorkflowEnum
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGlideEnum, TokenGuardEnum
+
+_WE = WorkflowEnum()
+_SBE = StepBaseEnum
+_SGE = StepGlideEnum()
+_TE = TokenGuardEnum()
+
+
+class TestFEPPlusWorkflow(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+
+        cls._test_dir = attach_root_path("tests/junk/integration")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_fep_plus_workflow(self):
+
+        conf = {
+            _WE.HEADER: {
+                _WE.ID: "Docking/FEP+ combined workflow",
+                _WE.DESCRIPTION: "test setup for FEP+ integration",
+                _WE.ENVIRONMENT: {_WE.ENVIRONMENT_EXPORT: []},
+                _WE.GLOBAL_VARIABLES: {
+                    "smiles": "3,4-DIAMINOBENZOTRIFLUORIDE:Nc1ccc(cc1N)C(F)(F)F"
+                },
+            },
+            _WE.STEPS: [
+                {
+                    _SBE.STEPID: "initialization_smile",
+                    _SBE.STEP_TYPE: "initialization",
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "{smiles}",
+                                _SBE.INPUT_SOURCE_TYPE: "string",
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "Ligprep",
+                    _SBE.STEP_TYPE: "ligprep",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws",
+                        "parallelization": {"cores": 2, "max_length_sublists": 1},
+                        "failure_policy": {"n_tries": 3},
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-epik"],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                "-ph": 7.0,
+                                "-pht": 2.0,
+                                "-s": 10,
+                                "-bff": 14,
+                                "-HOST": "localhost",
+                            },
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "filter_file": {"Total_charge": "!= 0"}
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "initialization_smile",
+                                _SBE.INPUT_SOURCE_TYPE: "step",
+                            }
+                        ]
+                    },
+                    _SBE.WRITEOUT: [
+                        {
+                            _SBE.INPUT_COMPOUNDS: {
+                                _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_ENUMERATIONS
+                            },
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: "{entrypoint_dir}/ligprep_enums.sdf",
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "SDF",
+                            },
+                        }
+                    ],
+                },
+                {
+                    _SBE.STEPID: "Glide",
+                    _SBE.STEP_TYPE: "glide",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws",
+                        _SBE.EXEC_PARALLELIZATION: {
+                            _SBE.EXEC_PARALLELIZATION_CORES: 4,
+                            _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1,
+                        },
+                        _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 3},
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-HOST": "localhost"},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "configuration": {
+                                "AMIDE_MODE": "trans",
+                                "EXPANDED_SAMPLING": "True",
+                                "GRIDFILE": [PATHS_EXAMPLEDATA.PRIME_COX2_GRID],
+                                "NENHANCED_SAMPLING": "1",
+                                "POSE_OUTTYPE": "poseviewer",
+                                "POSES_PER_LIG": "1",
+                                "POSTDOCK_NPOSE": "25",
+                                "POSTDOCKSTRAIN": "True",
+                                "PRECISION": "SP",
+                                "REWARD_INTRA_HBONDS": "True",
+                            }
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "Ligprep",
+                                _SBE.INPUT_SOURCE_TYPE: "step",
+                            }
+                        ]
+                    },
+                    _SBE.WRITEOUT: [
+                        {
+                            _SBE.INPUT_COMPOUNDS: {"category": "conformers"},
+                            "destination": {
+                                "resource": "{entrypoint_dir}/tests/junk/docked_conformers_cox2_actives.sdf",
+                                _SBE.STEP_TYPE: "file",
+                                "format": "SDF",
+                            },
+                        }
+                    ],
+                },
+                {
+                    _SBE.STEPID: "FEP_plus_setup",
+                    _SBE.STEP_TYPE: "fep_plus_setup",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws"
+                    },
+                    _SBE.SETTINGS: {},
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "Glide",
+                                _SBE.INPUT_SOURCE_TYPE: "step",
+                                "target_field": _SBE.INPUT_COMPOUNDS,
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "FEP_plus_exec",
+                    _SBE.STEP_TYPE: "fep_plus_exec",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws"
+                    },
+                    _SBE.TOKEN_GUARD: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws",
+                        _SBE.EXEC_BINARYLOCATION: "ssh 10.220.1.4 /opt/schrodinger/suite/installations/default",
+                        _TE.TG_TOKEN_POOLS: {"FEP_GPGPU": 16},
+                        "wait_interval_seconds": 30,
+                        "wait_limit_seconds": 0,
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                "-JOBNAME": "test",
+                                "-HOST": "fep-compute",
+                            },
+                        }
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "Glide",
+                                _SBE.INPUT_SOURCE_TYPE: "step",
+                                "target_field": _SBE.INPUT_COMPOUNDS,
+                            }
+                        ],
+                        "generic": [
+                            {_SBE.INPUT_SOURCE: "FEP_plus_setup", "extension": "fmp"}
+                        ],
+                    },
+                    _SBE.WRITEOUT: [
+                        {
+                            _SBE.INPUT_COMPOUNDS: {
+                                _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS,
+                                _SBE.WRITEOUT_COMP_SELECTED_TAGS: [
+                                    "dG",
+                                    "docking_score",
+                                ],
+                            },
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION: os.path.join(
+                                    self._test_dir, "fep_scored_conformers.csv"
+                                ),
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "CSV",
+                            },
+                        }
+                    ],
+                },
+            ],
+        }
+
+        wflow = WorkFlow(**conf)
+        wflow.initialize()
+        wflow.execute()
+
+        out_path = os.path.join(self._test_dir, "fep_scored_conformers.csv")
+        stat_inf = os.stat(out_path)
+        self.assertGreaterEqual(stat_inf.st_size, 4252)
diff --git a/tests/integration_tests/test_gromacs.py b/tests/integration_tests/test_gromacs.py
new file mode 100644
index 0000000..aae0171
--- /dev/null
+++ b/tests/integration_tests/test_gromacs.py
@@ -0,0 +1,1076 @@
+import unittest
+import os
+from tests.tests_paths import MAIN_CONFIG, PATHS_EXAMPLEDATA, export_unit_test_env_vars
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.core.composite_agents.workflow import WorkFlow
+from icolos.utils.enums.composite_agents_enums import WorkflowEnum
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum
+
+_WE = WorkflowEnum()
+_SBE = StepBaseEnum
+_SGE = StepGromacsEnum()
+
+
+class Test_MD_Fpocket(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/integration")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def test_workflow_MD_fpocket_holo(self):
+        """
+        run a full gromacs MD workflow from a pdb structure, then fpocket on the resulting trajectory
+        MDPocket is run on the holo structure
+        """
+
+        conf = {
+            _WE.HEADER: {
+                _WE.ID: "gromacs_test",
+                _WE.DESCRIPTION: "full md run with gromacs, with MDpocket run to extract descriptors for binding pocket",
+                _WE.ENVIRONMENT: {
+                    _WE.ENVIRONMENT_EXPORT: [
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "GMX_GPU_DD_COMMS",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "True",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "GMX_GPU_PME_PP_COMMS",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "True",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "GMX_FORCE_UPDATE_DEFAULT_GPU",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "True",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "GMXLIB",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "<path>/forcefields/",
+                        },
+                    ]
+                },
+                _WE.GLOBAL_VARIABLES: {
+                    "root_dir": "<path>/icolos",
+                    "file_base": os.path.join(
+                        MAIN_CONFIG["ICOLOS_TEST_DATA"], "gromacs/protein"
+                    ),
+                    "output_dir": attach_root_path("tests/junk/integration"),
+                },
+            },
+            _WE.STEPS: [
+                {
+                    _SBE.STEPID: "01_pdb2gmx",
+                    _SBE.STEP_TYPE: "pdb2gmx",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-ignh"],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                "-water": "tip4p",
+                                "-ff": "amber14sb_OL15",
+                            },
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {},
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: attach_root_path(
+                                    PATHS_EXAMPLEDATA.MDPOCKET_PDB_FILE_DRY
+                                ),
+                                _SBE.INPUT_EXTENSION: "pdb",
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "02_editconf",
+                    _SBE.STEP_TYPE: "editconf",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                "-d": "1.5",
+                                "-bt": "dodecahedron",
+                            },
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {},
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "01_pdb2gmx",
+                                _SBE.INPUT_EXTENSION: "gro",
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "03_solvate",
+                    _SBE.STEP_TYPE: "solvate",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-cs": "tip4p"},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {},
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "02_editconf",
+                                _SBE.INPUT_EXTENSION: "gro",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "01_pdb2gmx",
+                                _SBE.INPUT_EXTENSION: "top",
+                            },
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "04_grompp",
+                    _SBE.STEP_TYPE: "grompp",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "-r": False,
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "03_solvate",
+                                _SBE.INPUT_EXTENSION: "gro",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "{file_base}/ions.mdp",
+                                _SBE.INPUT_EXTENSION: "mdp",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "03_solvate",
+                                _SBE.INPUT_EXTENSION: "top",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "01_pdb2gmx",
+                                _SBE.INPUT_EXTENSION: "itp",
+                            },
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "05_genion",
+                    _SBE.STEP_TYPE: "genion",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-neutral"],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                "-pname": "NA",
+                                "-nname": "CL",
+                            },
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "pipe_input": "SOL",
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "04_grompp",
+                                _SBE.INPUT_EXTENSION: "tpr",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "04_grompp",
+                                _SBE.INPUT_EXTENSION: "top",
+                            },
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "06_grompp_eminim",
+                    _SBE.STEP_TYPE: "grompp",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "-r": False,
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "05_genion",
+                                _SBE.INPUT_EXTENSION: "gro",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "{file_base}/minim.mdp",
+                                _SBE.INPUT_EXTENSION: "mdp",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "05_genion",
+                                _SBE.INPUT_EXTENSION: "top",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "01_pdb2gmx",
+                                _SBE.INPUT_EXTENSION: "itp",
+                            },
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "07_eminim_mdrun",
+                    _SBE.STEP_TYPE: "mdrun",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "06_grompp_eminim",
+                                _SBE.INPUT_EXTENSION: "tpr",
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "08_nvt_grompp",
+                    _SBE.STEP_TYPE: "grompp",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-n": "index.ndx"},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "-r": True,
+                            "make_ndx_command": "auto",
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "07_eminim_mdrun",
+                                _SBE.INPUT_EXTENSION: "gro",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "05_genion",
+                                _SBE.INPUT_EXTENSION: "top",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "{file_base}/nvt_equil.mdp",
+                                _SBE.INPUT_EXTENSION: "mdp",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "01_pdb2gmx",
+                                _SBE.INPUT_EXTENSION: "itp",
+                            },
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "09_nvt_mdrun",
+                    _SBE.STEP_TYPE: "mdrun",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {},
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "08_nvt_grompp",
+                                _SBE.INPUT_EXTENSION: "tpr",
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "10_npt_grompp",
+                    _SBE.STEP_TYPE: "grompp",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-n": "index.ndx"},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "-r": True,
+                            "make_ndx_command": "auto",
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "09_nvt_mdrun",
+                                _SBE.INPUT_EXTENSION: "gro",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "05_genion",
+                                _SBE.INPUT_EXTENSION: "top",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "{file_base}/npt_equil.mdp",
+                                _SBE.INPUT_EXTENSION: "mdp",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "01_pdb2gmx",
+                                _SBE.INPUT_EXTENSION: "itp",
+                            },
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "11_npt_mdrun",
+                    _SBE.STEP_TYPE: "mdrun",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {},
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "10_npt_grompp",
+                                _SBE.INPUT_EXTENSION: "tpr",
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "12_prod_md_grompp",
+                    _SBE.STEP_TYPE: "grompp",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-n": "index.ndx"},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "-r": False,
+                            "make_ndx_command": "auto",
+                            "fields": {"nsteps": "5000"},
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "11_npt_mdrun",
+                                _SBE.INPUT_EXTENSION: "gro",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "05_genion",
+                                _SBE.INPUT_EXTENSION: "top",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "{file_base}/md.mdp",
+                                _SBE.INPUT_EXTENSION: "mdp",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "01_pdb2gmx",
+                                _SBE.INPUT_EXTENSION: "itp",
+                            },
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "13_prod_mdrun",
+                    _SBE.STEP_TYPE: "mdrun",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                "-nb": "gpu",
+                                "-bonded": "gpu",
+                                "-pme": "gpu",
+                                "-c": "structure.pdb",
+                            },
+                        }
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "12_prod_md_grompp",
+                                _SBE.INPUT_EXTENSION: "tpr",
+                            }
+                        ]
+                    },
+                    "writeout": [
+                        {
+                            _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "xtc"},
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1.xtc",
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT",
+                            },
+                        },
+                        {
+                            _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "log"},
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1.log",
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT",
+                            },
+                        },
+                        {
+                            _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "gro"},
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1.gro",
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT",
+                            },
+                        },
+                        {
+                            _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "tpr"},
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1.tpr",
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT",
+                            },
+                        },
+                    ],
+                },
+                {
+                    _SBE.STEPID: "14_trjconv",
+                    _SBE.STEP_TYPE: "trjconv",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-center"]
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {"pipe_input": "echo -ne 1 0"},
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "13_prod_mdrun",
+                                _SBE.INPUT_EXTENSION: "xtc",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "13_prod_mdrun",
+                                _SBE.INPUT_EXTENSION: "tpr",
+                            },
+                        ]
+                    },
+                    "writeout": [
+                        {
+                            _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "xtc"},
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1_trjconv.xtc",
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT",
+                            },
+                        }
+                    ],
+                },
+                {
+                    _SBE.STEPID: "15_MDpocket",
+                    _SBE.STEP_TYPE: "mdpocket",
+                    _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load fpocket"},
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {"format": "gromacs"},
+                        _SBE.SETTINGS_ADDITIONAL: {"format": "gromacs"},
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "14_trjconv",
+                                _SBE.INPUT_EXTENSION: "xtc",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "13_prod_mdrun",
+                                _SBE.INPUT_EXTENSION: "pdb",
+                            },
+                        ]
+                    },
+                    _SBE.WRITEOUT: [
+                        {
+                            _SBE.WRITEOUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "pdb"},
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}",
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT",
+                                _SBE.WRITEOUT_DESTINATION_MODE: "dir",
+                            },
+                        }
+                    ],
+                },
+            ],
+        }
+        export_unit_test_env_vars()
+        wflow = WorkFlow(**conf)
+        wflow.initialize()
+
+        self.assertEqual(len(wflow.steps), 15)
+
+        wflow.execute()
+
+        out_path = os.path.join(self._test_dir, "md_0_1_0.xtc")
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 62400)
+
+    def test_md_ligparam(self):
+
+        conf = {
+            _WE.HEADER: {
+                _WE.ID: "gromacs_test_ligparam",
+                _WE.DESCRIPTION: "full md run with gromacs, with ligand parametrisation",
+                _WE.ENVIRONMENT: {
+                    _WE.ENVIRONMENT_EXPORT: [
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "GMX_GPU_DD_COMMS",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "True",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "GMX_GPU_PME_PP_COMMS",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "True",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "GMX_FORCE_UPDATE_DEFAULT_GPU",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "True",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "ACPYPE",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "/projects/cc/mai/binaries/acpype",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "GMXLIB",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "<path>/gmx_workflow/forcefields/",
+                        },
+                    ]
+                },
+                _WE.GLOBAL_VARIABLES: {
+                    "forcefield": "<path>/gmx_workflow/forcefields/amber14sb_OL15.ff",
+                    "output_dir": attach_root_path("tests/junk/integration"),
+                    "file_base": PATHS_EXAMPLEDATA.GROMACS_PROTEIN_FILE_BASE,
+                },
+            },
+            _WE.STEPS: [
+                {
+                    _SBE.STEPID: "01_pdb2gmx",
+                    _SBE.STEP_TYPE: "pdb2gmx",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-ignh"],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                "-water": "tip4p",
+                                "-ff": "amber14sb_OL15",
+                            },
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {"forcefield": "{forcefield}"},
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: attach_root_path(
+                                    PATHS_EXAMPLEDATA.GROMACS_HOLO_STRUCTURE
+                                ),
+                                _SBE.INPUT_EXTENSION: "pdb",
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "02_editconf",
+                    _SBE.STEP_TYPE: "editconf",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                "-d": "1.5",
+                                "-bt": "dodecahedron",
+                            },
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {"forcefield": "{forcefield}"},
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "01_pdb2gmx",
+                                _SBE.INPUT_EXTENSION: "gro",
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "03_solvate",
+                    _SBE.STEP_TYPE: "solvate",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-cs": "tip4p"},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {},
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "02_editconf",
+                                _SBE.INPUT_EXTENSION: "gro",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "01_pdb2gmx",
+                                _SBE.INPUT_EXTENSION: "top",
+                            },
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "04_grompp",
+                    _SBE.STEP_TYPE: "grompp",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "-r": False,
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "03_solvate",
+                                _SBE.INPUT_EXTENSION: "gro",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "{file_base}/ions.mdp",
+                                _SBE.INPUT_EXTENSION: "mdp",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "03_solvate",
+                                _SBE.INPUT_EXTENSION: "top",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "01_pdb2gmx",
+                                _SBE.INPUT_EXTENSION: "itp",
+                            },
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "05_genion",
+                    _SBE.STEP_TYPE: "genion",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-neutral"],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                "-pname": "NA",
+                                "-nname": "CL",
+                            },
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "pipe_input": "SOL",
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "04_grompp",
+                                _SBE.INPUT_EXTENSION: "tpr",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "04_grompp",
+                                _SBE.INPUT_EXTENSION: "top",
+                            },
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "06_grompp_eminim",
+                    _SBE.STEP_TYPE: "grompp",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "-r": False,
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "05_genion",
+                                _SBE.INPUT_EXTENSION: "gro",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "{file_base}/minim.mdp",
+                                _SBE.INPUT_EXTENSION: "mdp",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "05_genion",
+                                _SBE.INPUT_EXTENSION: "top",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "01_pdb2gmx",
+                                _SBE.INPUT_EXTENSION: "itp",
+                            },
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "07_eminim_mdrun",
+                    _SBE.STEP_TYPE: "mdrun",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {},
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "06_grompp_eminim",
+                                _SBE.INPUT_EXTENSION: "tpr",
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "08_nvt_grompp",
+                    _SBE.STEP_TYPE: "grompp",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-n": "index.ndx"},
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-n": "index.ndx"},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "-r": True,
+                            "make_ndx_command": "auto",
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "07_eminim_mdrun",
+                                _SBE.INPUT_EXTENSION: "gro",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "05_genion",
+                                _SBE.INPUT_EXTENSION: "top",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "{file_base}/nvt_equil.mdp",
+                                _SBE.INPUT_EXTENSION: "mdp",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "01_pdb2gmx",
+                                _SBE.INPUT_EXTENSION: "itp",
+                            },
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "09_nvt_mdrun",
+                    _SBE.STEP_TYPE: "mdrun",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {},
+                        _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                    },
+                    _SBE.SETTINGS_ADDITIONAL: {},
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "08_nvt_grompp",
+                                _SBE.INPUT_EXTENSION: "tpr",
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "10_npt_grompp",
+                    _SBE.STEP_TYPE: "grompp",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-n": "index.ndx"},
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"-n": "index.ndx"},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "-r": True,
+                            "make_ndx_command": "auto",
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "09_nvt_mdrun",
+                                _SBE.INPUT_EXTENSION: "gro",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "05_genion",
+                                _SBE.INPUT_EXTENSION: "top",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "{file_base}/npt_equil.mdp",
+                                _SBE.INPUT_EXTENSION: "mdp",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "01_pdb2gmx",
+                                _SBE.INPUT_EXTENSION: "itp",
+                            },
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "11_npt_mdrun",
+                    _SBE.STEP_TYPE: "mdrun",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {},
+                        _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        _SBE.SETTINGS_ADDITIONAL: {},
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "10_npt_grompp",
+                                _SBE.INPUT_EXTENSION: "tpr",
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "12_prod_md_grompp",
+                    _SBE.STEP_TYPE: "grompp",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                "-n": "index.ndx",
+                            },
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "-r": False,
+                            "fields": {"nsteps": "5000"},
+                            "make_ndx_command": "auto",
+                            "fields": {"nsteps": "5000"},
+                            "make_ndx_command": "auto",
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "11_npt_mdrun",
+                                _SBE.INPUT_EXTENSION: "gro",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "05_genion",
+                                _SBE.INPUT_EXTENSION: "top",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "{file_base}/md.mdp",
+                                _SBE.INPUT_EXTENSION: "mdp",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "01_pdb2gmx",
+                                _SBE.INPUT_EXTENSION: "itp",
+                            },
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "13_prod_mdrun",
+                    _SBE.STEP_TYPE: "mdrun",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                "-nb": "gpu",
+                                "-bonded": "gpu",
+                                "-pme": "gpu",
+                                "-c": "structure.pdb",
+                            },
+                        }
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "12_prod_md_grompp",
+                                _SBE.INPUT_EXTENSION: "tpr",
+                            }
+                        ]
+                    },
+                    _SBE.WRITEOUT: [
+                        {
+                            _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "xtc"},
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1.xtc",
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT",
+                            },
+                        },
+                        {
+                            _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "log"},
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1.log",
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT",
+                            },
+                        },
+                        {
+                            _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "gro"},
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1.gro",
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT",
+                            },
+                        },
+                        {
+                            _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "tpr"},
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1.tpr",
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT",
+                            },
+                        },
+                    ],
+                },
+                {
+                    _SBE.STEPID: "14_trjconv",
+                    _SBE.STEP_TYPE: "trjconv",
+                    _SBE.EXEC: {
+                        _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a"
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-center"]
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {"pipe_input": "echo -ne 1 0"},
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_GENERIC: [
+                            {
+                                _SBE.INPUT_SOURCE: "13_prod_mdrun",
+                                _SBE.INPUT_EXTENSION: "xtc",
+                            },
+                            {
+                                _SBE.INPUT_SOURCE: "13_prod_mdrun",
+                                _SBE.INPUT_EXTENSION: "tpr",
+                            },
+                        ]
+                    },
+                    _SBE.WRITEOUT: [
+                        {
+                            _SBE.INPUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "xtc"},
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: "{output_dir}/md_0_1_trjconv.xtc",
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "TXT",
+                            },
+                        }
+                    ],
+                },
+            ],
+        }
+
+        export_unit_test_env_vars()
+        wflow = WorkFlow(**conf)
+        wflow.initialize()
+
+        self.assertEqual(len(wflow.steps), 14)
+
+        wflow.execute()
+
+        out_path = os.path.join(self._test_dir, "md_0_1_0.xtc")
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 324000)
diff --git a/tests/integration_tests/test_rmsd_iter.py b/tests/integration_tests/test_rmsd_iter.py
new file mode 100644
index 0000000..6556e3e
--- /dev/null
+++ b/tests/integration_tests/test_rmsd_iter.py
@@ -0,0 +1,344 @@
+import unittest
+import os
+from tests.tests_paths import PATHS_1UYD
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.core.composite_agents.workflow import WorkFlow
+from icolos.utils.enums.composite_agents_enums import WorkflowEnum
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGlideEnum
+
+
+_WE = WorkflowEnum()
+_SBE = StepBaseEnum
+_SGE = StepGlideEnum()
+
+
+class TestDockingWorkflow(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+
+        cls._test_dir = attach_root_path("tests/junk/integration")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_iterator_workflow(self):
+        """
+        Runs the RMSD-corrected docking workflow using multiple xtb settings in parallel
+        """
+
+        conf = {
+            _WE.HEADER: {
+                _WE.ID: "RMSD_rescoring",
+                _WE.DESCRIPTION: "Run RMSD rescoring on docking pose",
+                _WE.ENVIRONMENT: {
+                    _WE.ENVIRONMENT_EXPORT: [
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "OE_LICENSE",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "/opt/scp/software/oelicense/1.0/oe_license.seq1",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "XTBHOME",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "/projects/cc/mai/binaries/xtb-6.4.0",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "XTBPATH",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "${XTBHOME}/share/xtb",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "PATH",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "${PATH}:${XTBHOME}/bin",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "PKG_CONFIG_PATH",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "${PKG_CONFIG_PATH}:${XTBHOME}/lib64/pkgconfig",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "PARA_ARCH",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "MPI",
+                        },
+                        {
+                            _WE.ENVIRONMENT_EXPORT_KEY: "PARNODES",
+                            _WE.ENVIRONMENT_EXPORT_VALUE: "6",
+                        },
+                    ]
+                },
+                _WE.GLOBAL_VARIABLES: {
+                    "smiles": "3,4-DIAMINOBENZOTRIFLUORIDE:Nc1ccc(cc1N)C(F)(F)F"
+                },
+            },
+            _WE.STEPS: [
+                {
+                    _SBE.STEPID: "initialization_smile",
+                    _SBE.STEP_TYPE: "initialization",
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "{smiles}",
+                                _SBE.INPUT_SOURCE_TYPE: "string",
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "Ligprep",
+                    _SBE.STEP_TYPE: "ligprep",
+                    _SBE.EXEC: {
+                        "prefix_execution": "module load schrodinger/2020-4",
+                        "parallelization": {"cores": 2, "max_length_sublists": 1},
+                        "failure_policy": {"n_tries": 3},
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-epik"],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                "-ph": 7.0,
+                                "-pht": 2.0,
+                                "-s": 10,
+                                "-bff": 14,
+                            },
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "filter_file": {"Total_charge": "!= 0"}
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "initialization_smile",
+                                _SBE.INPUT_SOURCE_TYPE: "step",
+                            }
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "Glide",
+                    _SBE.STEP_TYPE: "glide",
+                    _SBE.EXEC: {
+                        "prefix_execution": "module load schrodinger/2020-4",
+                        "parallelization": {"cores": 4, "max_length_sublists": 1},
+                        "failure_policy": {"n_tries": 3},
+                    },
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ARGUMENTS: {
+                            _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                            _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                        },
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "configuration": {
+                                "AMIDE_MODE": "trans",
+                                "EXPANDED_SAMPLING": "True",
+                                "GRIDFILE": [PATHS_1UYD.GRID_PATH],
+                                "NENHANCED_SAMPLING": "1",
+                                "POSE_OUTTYPE": "ligandlib_sd",
+                                "POSES_PER_LIG": "3",
+                                "POSTDOCK_NPOSE": "25",
+                                "POSTDOCKSTRAIN": "True",
+                                "PRECISION": "SP",
+                                "REWARD_INTRA_HBONDS": "True",
+                            }
+                        },
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {
+                                _SBE.INPUT_SOURCE: "Ligprep",
+                                _SBE.INPUT_SOURCE_TYPE: "step",
+                            }
+                        ]
+                    },
+                    _SBE.WRITEOUT: [
+                        {
+                            _SBE.INPUT_COMPOUNDS: {
+                                _SBE.WRITEOUT_COMP_CATEGORY: "conformers"
+                            },
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: "tests/junk/integration/rmsd_rescoring_docked_conformers.sdf",
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "SDF",
+                            },
+                        },
+                        {
+                            _SBE.INPUT_COMPOUNDS: {
+                                _SBE.WRITEOUT_COMP_CATEGORY: "conformers",
+                                _SBE.WRITEOUT_COMP_SELECTED_TAGS: [
+                                    "docking_score",
+                                    "grid_id",
+                                ],
+                                _SBE.WRITEOUT_COMP_AGGREGATION: {
+                                    _SBE.WRITEOUT_COMP_AGGREGATION_MODE: "best_per_compound",
+                                    _WE.ENVIRONMENT_EXPORT_KEY: "docking_score",
+                                    _SBE.WRITEOUT_COMP_AGGREGATION_HIGHESTISBEST: False,
+                                },
+                            },
+                            _SBE.WRITEOUT_DESTINATION: {
+                                _SBE.WRITEOUT_DESTINATION_RESOURCE: "tests/junk/integration/rmsd_rescoring_docked_conformers.csv",
+                                _SBE.STEP_TYPE: "file",
+                                _SBE.WRITEOUT_DESTINATION_FORMAT: "CSV",
+                            },
+                        },
+                    ],
+                },
+                {
+                    _SBE.STEPID: "compound_filter",
+                    _SBE.STEP_TYPE: "filter",
+                    _SBE.SETTINGS: {
+                        _SBE.SETTINGS_ADDITIONAL: {
+                            "filter_level": _SBE.INPUT_COMPOUNDS,
+                            "criteria": "docking_score",
+                            "return_n": 1,
+                            "highest_is_best": False,
+                        }
+                    },
+                    _SBE.INPUT: {
+                        _SBE.INPUT_COMPOUNDS: [
+                            {_SBE.INPUT_SOURCE: "Glide", _SBE.INPUT_SOURCE_TYPE: "step"}
+                        ]
+                    },
+                },
+                {
+                    _SBE.STEPID: "test_iterator",
+                    _SBE.STEP_TYPE: "iterator",
+                    "base_config": [
+                        {
+                            _SBE.STEPID: "xtb",
+                            _SBE.STEP_TYPE: "xtb",
+                            _SBE.EXEC: {
+                                "binary_location": "/projects/cc/mai/binaries/xtb-6.4.0",
+                                "parallelization": {"cores": 4},
+                            },
+                            _SBE.SETTINGS: {
+                                _SBE.SETTINGS_ARGUMENTS: {
+                                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                        "--gbsa": "h2o"
+                                    },
+                                }
+                            },
+                            _SBE.INPUT: {
+                                _SBE.INPUT_COMPOUNDS: [
+                                    {
+                                        _SBE.INPUT_SOURCE: "compound_filter",
+                                        _SBE.INPUT_SOURCE_TYPE: "step",
+                                    }
+                                ]
+                            },
+                            _SBE.WRITEOUT: [
+                                {
+                                    _SBE.INPUT_COMPOUNDS: {
+                                        _SBE.WRITEOUT_COMP_CATEGORY: "conformers"
+                                    },
+                                    _SBE.WRITEOUT_DESTINATION: {
+                                        _SBE.WRITEOUT_DESTINATION_RESOURCE: "tests/junk/rmsd_rescoring_xtb.sdf",
+                                        _SBE.STEP_TYPE: "file",
+                                        _SBE.WRITEOUT_DESTINATION_FORMAT: "SDF",
+                                    },
+                                }
+                            ],
+                        },
+                        {
+                            _SBE.STEPID: "data_manipulation",
+                            _SBE.STEP_TYPE: "data_manipulation",
+                            _SBE.SETTINGS: {
+                                _SBE.SETTINGS_ADDITIONAL: {
+                                    "action": "attach_conformers_as_extra",
+                                    _SBE.INPUT_SOURCE: "xtb",
+                                }
+                            },
+                            _SBE.INPUT: {
+                                _SBE.INPUT_COMPOUNDS: [
+                                    {
+                                        _SBE.INPUT_SOURCE: "compound_filter",
+                                        _SBE.INPUT_SOURCE_TYPE: "step",
+                                    }
+                                ]
+                            },
+                        },
+                        {
+                            _SBE.STEPID: "rmsd",
+                            _SBE.STEP_TYPE: "rmsd",
+                            _SBE.SETTINGS: {
+                                _SBE.SETTINGS_ADDITIONAL: {"method": "alignmol"}
+                            },
+                            _SBE.INPUT: {
+                                _SBE.INPUT_COMPOUNDS: [
+                                    {
+                                        _SBE.INPUT_SOURCE: "data_manipulation",
+                                        _SBE.INPUT_SOURCE_TYPE: "step",
+                                    }
+                                ]
+                            },
+                            _SBE.WRITEOUT: [
+                                {
+                                    _SBE.INPUT_COMPOUNDS: {
+                                        _SBE.WRITEOUT_COMP_CATEGORY: "conformers"
+                                    },
+                                    _SBE.WRITEOUT_DESTINATION: {
+                                        _SBE.WRITEOUT_DESTINATION_RESOURCE: "tests/junk/integration/rmsd_rescoring.sdf",
+                                        _SBE.STEP_TYPE: "file",
+                                        _SBE.WRITEOUT_DESTINATION_FORMAT: "SDF",
+                                    },
+                                },
+                                {
+                                    _SBE.INPUT_COMPOUNDS: {
+                                        _SBE.WRITEOUT_COMP_CATEGORY: "conformers",
+                                        _SBE.WRITEOUT_COMP_SELECTED_TAGS: [
+                                            "docking_score",
+                                            "rmsd",
+                                            "grid_id",
+                                        ],
+                                        _SBE.WRITEOUT_COMP_AGGREGATION: {
+                                            _SBE.WRITEOUT_COMP_AGGREGATION_MODE: "best_per_compound",
+                                            _WE.ENVIRONMENT_EXPORT_KEY: "docking_score",
+                                        },
+                                    },
+                                    _SBE.WRITEOUT_DESTINATION: {
+                                        _SBE.WRITEOUT_DESTINATION_RESOURCE: "tests/junk/integration/rmsd_rescoring.csv",
+                                        _SBE.STEP_TYPE: "file",
+                                        _SBE.WRITEOUT_DESTINATION_FORMAT: "CSV",
+                                    },
+                                },
+                            ],
+                        },
+                    ],
+                    "iter_settings": {
+                        _SBE.SETTINGS: {
+                            "xtb": {
+                                _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                    "--opt": [
+                                        "vtight",
+                                        "vtight",
+                                        "vtight",
+                                        "vtight",
+                                        "vtight",
+                                        "vtight",
+                                        "vtight",
+                                        "tight",
+                                    ]
+                                }
+                            }
+                        },
+                        "n_iters": 8,
+                        "iter_mode": "single",
+                        "parallelizer_settings": {
+                            "parallelize": True,
+                            "cores": 8,
+                            "max_length_sublists": 3,
+                        },
+                    },
+                },
+            ],
+        }
+
+        wflow = WorkFlow(**conf)
+        wflow.initialize()
+
+        self.assertEqual(len(wflow.steps), 5)
+
+        wflow.execute()
+
+        out_path = os.path.join(self._test_dir, "run_0/rmsd_rescoring.csv")
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 82)
diff --git a/tests/io/__init__.py b/tests/io/__init__.py
new file mode 100644
index 0000000..123d3ff
--- /dev/null
+++ b/tests/io/__init__.py
@@ -0,0 +1,3 @@
+from tests.io.test_initialize_compound import *
+from tests.io.test_embedder import *
+from tests.io.test_data_manipulation import Test_DataManipulation
diff --git a/tests/io/test_data_manipulation.py b/tests/io/test_data_manipulation.py
new file mode 100644
index 0000000..905ace1
--- /dev/null
+++ b/tests/io/test_data_manipulation.py
@@ -0,0 +1,325 @@
+import unittest
+from copy import deepcopy
+
+from rdkit.Geometry.rdGeometry import Point3D
+
+from icolos.core.composite_agents.workflow import WorkFlow
+from icolos.core.containers.compound import Compound, Conformer, Enumeration
+from tests.tests_paths import PATHS_1UYD
+from icolos.core.containers.generic import GenericData
+from icolos.core.workflow_steps.io.data_manipulation import StepDataManipulation
+from icolos.core.workflow_steps.step import StepBase
+from icolos.utils.enums.composite_agents_enums import WorkflowEnum
+from rdkit.Chem import SDMolSupplier
+
+from icolos.utils.enums.step_enums import (
+    StepBaseEnum,
+    StepDataManipulationEnum,
+    StepFilterEnum,
+)
+from icolos.utils.general.files_paths import attach_root_path, empty_output_dir
+import os
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    export_unit_test_env_vars,
+    get_ligands_as_compounds_with_conformers,
+    get_mol_as_Conformer,
+)
+
+_SBE = StepBaseEnum
+_SDM = StepDataManipulationEnum()
+_WE = WorkflowEnum()
+_SFE = StepFilterEnum()
+
+
+class Test_DataManipulation(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/data_manip")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def _get_step_filter_compounds(self):
+        # produce the compounds object for testing
+        # return 5 dummy compounds with 1 enumeration and 5 conformers per enumeration
+        mols = SDMolSupplier(attach_root_path(PATHS_1UYD.LIGANDS))
+        mol = mols[0]
+        compounds = []
+        for i in range(5):
+            compound = Compound(name=str(i), compound_number=i)
+            enum = Enumeration()
+            for i in range(5):
+                conf = Conformer(conformer=mol, conformer_id=i, enumeration_object=enum)
+                enum.add_conformer(conformer=conf)
+            compound.add_enumeration(enum)
+            compounds.append(compound)
+        return compounds
+
+    def setUp(self):
+        self._compounds = self._get_step_filter_compounds()
+        # dG score gets gradually worse, prime gets gradually worse during the conformer walk
+        dG_value = -13
+        prime_value = -2900
+        for compound in self._compounds:
+            for enum in compound.get_enumerations():
+                for conformer in enum.get_conformers():
+                    conformer.get_molecule().SetProp("dG", str(dG_value))
+                    conformer.get_molecule().SetProp(
+                        "r_psp_MMGBSA_dG_Bind", str(prime_value)
+                    )
+                    dG_value += 0.2
+                    prime_value -= 10
+        # remove files from previous runs
+        empty_output_dir(self._test_dir)
+
+        with open(PATHS_EXAMPLEDATA.PRIME_RECEPTOR_COX2, "r") as f:
+            data = f.read()
+        self.complex_conformers = get_ligands_as_compounds_with_conformers(
+            PATHS_EXAMPLEDATA.LIGANDS_1UYD
+        )
+        self.mae_file = GenericData(file_name="structure.mae", file_data=data)
+        conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS)
+
+        # Compound 1 with 1 enumeration and 11 conformers
+        self.comp1 = Compound(compound_number=1)
+        self.comp1.add_enumeration(Enumeration(), auto_update=True)
+        self.comp1[0].add_conformers(deepcopy(conformers), auto_update=True)
+
+        # Compound 2 with 1 enumeration and 11 conformers, change of some coordinates
+        self.comp2 = Compound(compound_number=1)
+        self.comp2.add_enumeration(Enumeration(), auto_update=True)
+        self.comp2[0].add_conformers(deepcopy(conformers), auto_update=True)
+        self.comp2[0][1].get_molecule().GetConformer().SetAtomPosition(
+            0, Point3D(-4.2239, -0.441, 0.2458)
+        )
+        self.comp2[0][7].get_molecule().GetConformer().SetAtomPosition(
+            0, Point3D(-1.5442, -0.7854, 0.5883)
+        )
+
+        # workflow (necessary to pass on data)
+        conf = {
+            _WE.HEADER: {_WE.ID: "test_workflow", _WE.ENVIRONMENT: {}},
+            _WE.STEPS: [],
+        }
+        self.workflow = WorkFlow(**conf)
+
+        # dummy step
+        step_conf = {
+            _SBE.STEPID: "01_dummy",
+            _SBE.STEP_TYPE: "dummy",
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {},
+            },
+        }
+        dummy_step = StepBase(**step_conf)
+        dummy_step.get_compounds().append(self.comp2)
+        dummy_step.set_workflow_object(self.workflow)
+        self.workflow.add_step(dummy_step)
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_matching(self):
+        step_conf = {
+            _SBE.STEPID: "01_data_manip",
+            _SBE.STEP_TYPE: _SBE.STEP_DATA_MANIPULATION,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SDM.ACTION: _SDM.ACTION_ATTACH_CONFORMERS_AS_EXTRA
+                },
+            },
+        }
+        manip_step = StepDataManipulation(**step_conf)
+        manip_step.get_compounds().append(self.comp1)
+        manip_step.set_workflow_object(self.workflow)
+        self.workflow.add_step(manip_step)
+
+        manip_step.settings.additional[_SDM.MATCH_SOURCE] = "01_dummy"
+        manip_step.execute()
+
+        self.assertEqual(
+            manip_step.get_compounds()[0][0][2]
+            .get_extra_data()[_SDM.KEY_MATCHED][0]
+            .get_index_string(),
+            "1:0:2",
+        )
+
+    def test_no_action(self):
+        step_conf = {
+            _SBE.STEPID: "01_data_manip",
+            _SBE.STEP_TYPE: _SBE.STEP_DATA_MANIPULATION,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {_SDM.ACTION: _SDM.ACTION_NO_ACTION},
+            },
+        }
+        manip_step = StepDataManipulation(**step_conf)
+        manip_step.get_compounds().append(self.comp1)
+        manip_step.set_workflow_object(self.workflow)
+        self.workflow.add_step(manip_step)
+
+        manip_step.settings.additional[_SDM.MATCH_SOURCE] = "01_dummy"
+        manip_step.execute()
+
+        self.assertEqual(len(manip_step.get_compounds()[0][0]), 11)
+
+    def test_convert_mae2pdb(self):
+        step_conf = {
+            _SBE.STEPID: "01_data_manip",
+            _SBE.STEP_TYPE: _SBE.STEP_DATA_MANIPULATION,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {_SDM.ACTION: _SDM.CONVERT_MAE_TO_PDB},
+            },
+        }
+        manip_step = StepDataManipulation(**step_conf)
+        manip_step.set_workflow_object(self.workflow)
+        manip_step.data.generic.add_file(self.mae_file)
+        self.workflow.add_step(manip_step)
+
+        manip_step.execute()
+        out_path = os.path.join(self._test_dir, "structure.pdb")
+        manip_step.write_generic_by_extension(self._test_dir, "pdb")
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 734400)
+
+    def test_get_complexes(self):
+        step_conf = {
+            _SBE.STEPID: "01_data_manip",
+            _SBE.STEP_TYPE: _SBE.STEP_DATA_MANIPULATION,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SDM.ACTION: _SDM.ASSEMBLE_COMPLEXES,
+                    _SDM.RECEPTOR: PATHS_EXAMPLEDATA.RECEPTOR_1UYD,
+                },
+            },
+        }
+        manip_step = StepDataManipulation(**step_conf)
+        manip_step.data.compounds = self.complex_conformers
+        manip_step.set_workflow_object(self.workflow)
+        self.workflow.add_step(manip_step)
+
+        manip_step.execute()
+        out_path = os.path.join(self._test_dir, "0:0:0.pdb")
+        manip_step.write_generic_by_extension(self._test_dir, "pdb")
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 509600)
+
+    def test_filtering(self):
+        step_conf = {
+            _SBE.STEPID: "01_filtering",
+            _SBE.STEP_TYPE: _SBE.STEP_DATA_MANIPULATION,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SDM.ACTION: _SDM.FILTER,
+                    _SFE.FILTER_LEVEL: "enumerations",
+                    _SFE.CRITERIA: "dG",
+                    _SFE.RETURN_N: 3,
+                    _SFE.HIGHEST_IS_BEST: False,
+                }
+            },
+        }
+
+        step_filter = StepDataManipulation(**step_conf)
+        step_filter.data.compounds = self._compounds
+
+        step_filter.execute()
+        dG_max = (
+            step_filter.data.compounds[0]
+            .get_enumerations()[0]
+            .get_conformers()[0]
+            .get_molecule()
+            .GetProp("dG")
+        )
+        step_filter.write_conformers(
+            path=os.path.join(self._test_dir, "filtered_confs.sdf")
+        )
+        out_path = os.path.join(self._test_dir, "filtered_confs.sdf")
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 39708)
+        self.assertEqual(int(dG_max), -13)
+
+    def test_combined_filtering(self):
+        # filter based on a sum of two criteria attached to each conformer
+        step_conf = {
+            _SBE.STEPID: "01_filtering",
+            _SBE.STEP_TYPE: _SBE.STEP_DATA_MANIPULATION,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SDM.ACTION: _SDM.FILTER,
+                    _SFE.FILTER_LEVEL: "enumerations",
+                    _SFE.CRITERIA: ["dG", "r_psp_MMGBSA_dG_Bind"],
+                    _SFE.RETURN_N: 3,
+                    _SFE.HIGHEST_IS_BEST: False,
+                    _SFE.AGGREGATION: "sum",
+                }
+            },
+        }
+
+        step_filter = StepDataManipulation(**step_conf)
+        step_filter.data.compounds = self._compounds
+        step_filter.execute()
+
+        dG_bind_max = (
+            step_filter.data.compounds[0]
+            .get_enumerations()[0]
+            .get_conformers()[0]
+            .get_molecule()
+            .GetProp("r_psp_MMGBSA_dG_Bind")
+        )
+        # check we can get single values pack properly
+        self.assertEqual(int(dG_bind_max), -2900)
+        self.assertEqual(len(step_filter.data.compounds), 5)
+        self.assertEqual(len(step_filter.data.compounds[0][0].get_conformers()), 3)
+
+    def test_combined_filtering_compound_level(self):
+        # filter at the compound level
+        step_conf = {
+            _SBE.STEPID: "01_filtering",
+            _SBE.STEP_TYPE: _SBE.STEP_DATA_MANIPULATION,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SDM.ACTION: _SDM.FILTER,
+                    _SFE.FILTER_LEVEL: "compounds",
+                    _SFE.CRITERIA: ["dG", "r_psp_MMGBSA_dG_Bind"],
+                    _SFE.RETURN_N: 3,
+                    _SFE.HIGHEST_IS_BEST: False,
+                    _SFE.AGGREGATION: "sum",
+                }
+            },
+        }
+
+        step_filter = StepDataManipulation(**step_conf)
+        step_filter.data.compounds = self._compounds
+        step_filter.execute()
+
+        dG_bind_max = (
+            step_filter.data.compounds[0]
+            .get_enumerations()[0]
+            .get_conformers()[0]
+            .get_molecule()
+            .GetProp("r_psp_MMGBSA_dG_Bind")
+        )
+        # check we can get single values pack properly
+        self.assertEqual(int(dG_bind_max), -2900)
diff --git a/tests/io/test_embedder.py b/tests/io/test_embedder.py
new file mode 100644
index 0000000..7ad60ae
--- /dev/null
+++ b/tests/io/test_embedder.py
@@ -0,0 +1,135 @@
+import unittest
+
+from icolos.core.workflow_steps.io.embedder import StepEmbedding
+from icolos.utils.enums.step_enums import StepBaseEnum, StepEmbeddingEnum
+
+from tests.tests_paths import PATHS_EXAMPLEDATA
+
+_SBE = StepBaseEnum
+_SEE = StepEmbeddingEnum()
+
+
+class Test_Embedder(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        pass
+
+    def setUp(self):
+        self._SMI_path = PATHS_EXAMPLEDATA.MEDIUM_MOLECULES_SMI_PATH
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_embed_with_RDkit_no_protonation(self):
+        step_conf = {
+            _SBE.STEPID: "01_embed_molecule",
+            _SBE.STEP_TYPE: _SBE.STEP_EMBEDDING,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _SEE.RDKIT_PROTONATE: False,
+                        _SEE.METHOD: _SEE.METHOD_RDKIT,
+                    }
+                }
+            },
+            _SBE.INPUT: {
+                _SBE.INPUT_COMPOUNDS: [
+                    {
+                        _SBE.INPUT_SOURCE: self._SMI_path,
+                        _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE,
+                        _SBE.INPUT_FORMAT: _SBE.FORMAT_SMI,
+                    }
+                ]
+            },
+        }
+        init_step = StepEmbedding(**step_conf)
+        init_step.generate_input()
+        init_step.execute()
+
+        self.assertEqual(len(init_step.get_compounds()), 2)
+        self.assertEqual(len(init_step.get_compounds()[0]), 1)
+        self.assertEqual(len(init_step.get_compounds()[0][0]), 0)
+
+        self.assertListEqual(
+            list(
+                init_step.get_compounds()[0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-2.9314762660385534, 0.06628711293694872, 4.923008037397455],
+        )
+        self.assertListEqual(
+            list(
+                init_step.get_compounds()[1][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-2.6176730474256593, 0.37859007619202606, 0.6065857814585477],
+        )
+        self.assertEqual(
+            22, init_step.get_compounds()[0][0].get_molecule().GetNumAtoms()
+        )
+
+    def test_embed_with_RDkit_protonation(self):
+        step_conf = {
+            _SBE.STEPID: "01_embed_molecule",
+            _SBE.STEP_TYPE: _SBE.STEP_EMBEDDING,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _SEE.RDKIT_PROTONATE: True,
+                        _SEE.METHOD: _SEE.METHOD_RDKIT,
+                    }
+                }
+            },
+            _SBE.INPUT: {
+                _SBE.INPUT_COMPOUNDS: [
+                    {
+                        _SBE.INPUT_SOURCE: self._SMI_path,
+                        _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE,
+                        _SBE.INPUT_FORMAT: _SBE.FORMAT_SMI,
+                    }
+                ]
+            },
+        }
+        init_step = StepEmbedding(**step_conf)
+        init_step.generate_input()
+        init_step.execute()
+
+        self.assertEqual(len(init_step.get_compounds()), 2)
+        self.assertEqual(len(init_step.get_compounds()[0]), 1)
+        self.assertEqual(len(init_step.get_compounds()[0][0]), 0)
+
+        self.assertListEqual(
+            list(
+                init_step.get_compounds()[0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-2.9314762660385534, 0.06628711293694872, 4.923008037397455],
+        )
+        self.assertListEqual(
+            list(
+                init_step.get_compounds()[1][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-2.6176730474256593, 0.37859007619202606, 0.6065857814585477],
+        )
+        self.assertEqual(
+            41, init_step.get_compounds()[0][0].get_molecule().GetNumAtoms()
+        )
+        self.assertListEqual(
+            list(
+                init_step.get_compounds()[0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[40]
+            ),
+            [-3.576148794943472, -0.8051119546399829, -0.9424118920903588],
+        )
diff --git a/tests/io/test_initialize_compound.py b/tests/io/test_initialize_compound.py
new file mode 100644
index 0000000..ea2e49f
--- /dev/null
+++ b/tests/io/test_initialize_compound.py
@@ -0,0 +1,240 @@
+import unittest
+
+from icolos.core.workflow_steps.io.initialize_compound import StepInitializeCompound
+from icolos.utils.enums.step_enums import StepBaseEnum
+from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars
+
+_SBE = StepBaseEnum
+
+
+class Test_InitializeCompound(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        self._paracetamol_path = PATHS_EXAMPLEDATA.PARACETAMOL_PATH
+        self._SMI_path = PATHS_EXAMPLEDATA.MEDIUM_MOLECULES_SMI_PATH
+        self._JSON_path = PATHS_EXAMPLEDATA.SMALL_MOLECULES_JSON_PATH
+        self._CSV_path = PATHS_EXAMPLEDATA.SMALL_MOLECULES_CSV_PATH
+        self._CSV_path_semicolon = (
+            PATHS_EXAMPLEDATA.SMALL_MOLECULES_CSV_PATH_DELIMITER_SEMICOLON
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_initialize_compound_SDF(self):
+        step_conf = {
+            _SBE.STEPID: "01_load_molecule",
+            _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION,
+            _SBE.INPUT: {
+                _SBE.INPUT_COMPOUNDS: [
+                    {
+                        _SBE.INPUT_SOURCE: self._paracetamol_path,
+                        _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE,
+                        _SBE.INPUT_FORMAT: _SBE.FORMAT_SDF,
+                    }
+                ]
+            },
+        }
+        init_step = StepInitializeCompound(**step_conf)
+        init_step.generate_input()
+        init_step.execute()
+
+        self.assertEqual(len(init_step.get_compounds()), 1)
+        self.assertEqual(len(init_step.get_compounds()[0]), 1)
+        self.assertEqual(len(init_step.get_compounds()[0][0]), 1)
+
+        self.assertListEqual(
+            list(
+                init_step.get_compounds()[0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-3.8276, -1.0625, 0.3279],
+        )
+
+    def test_initialize_compound_SMI(self):
+        step_conf = {
+            _SBE.STEPID: "01_load_molecule",
+            _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION,
+            _SBE.INPUT: {
+                _SBE.INPUT_COMPOUNDS: [
+                    {
+                        _SBE.INPUT_SOURCE: self._SMI_path,
+                        _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE,
+                        _SBE.INPUT_FORMAT: _SBE.FORMAT_SMI,
+                    }
+                ]
+            },
+        }
+        init_step = StepInitializeCompound(**step_conf)
+        init_step.generate_input()
+        init_step.execute()
+
+        self.assertEqual(len(init_step.get_compounds()), 2)
+        self.assertEqual(len(init_step.get_compounds()[0]), 1)
+        self.assertEqual(len(init_step.get_compounds()[0][0]), 0)
+
+        self.assertEqual(
+            init_step.get_compounds()[0][0].get_smile(),
+            "CN(C)CCn1cc(c2ccc(F)cc2)c(n1)n3cccc3",
+        )
+        self.assertEqual(init_step.get_compounds()[1].get_name(), "mol7")
+
+    def test_initialize_compound_JSON(self):
+        step_conf = {
+            _SBE.STEPID: "01_load_molecule",
+            _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION,
+            _SBE.INPUT: {
+                _SBE.INPUT_COMPOUNDS: [
+                    {
+                        _SBE.INPUT_SOURCE: self._JSON_path,
+                        _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE,
+                        _SBE.INPUT_FORMAT: _SBE.FORMAT_JSON,
+                    }
+                ]
+            },
+        }
+        init_step = StepInitializeCompound(**step_conf)
+        init_step.generate_input()
+        init_step.execute()
+
+        self.assertEqual(len(init_step.get_compounds()), 3)
+        self.assertEqual(len(init_step.get_compounds()[0]), 1)
+        self.assertEqual(len(init_step.get_compounds()[0][0]), 0)
+
+        self.assertEqual(
+            init_step.get_compounds()[0][0].get_smile(),
+            "C#CCCCn1c(Cc2cc(OC)c(OC)c(OC)c2Cl)nc2c(N)ncnc21",
+        )
+        self.assertEqual(init_step.get_compounds()[1].get_name(), "1")
+
+    def test_initialize_compound_smile(self):
+        step_conf = {
+            _SBE.STEPID: "01_load_molecule",
+            _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION,
+            _SBE.INPUT: {
+                _SBE.INPUT_COMPOUNDS: [
+                    {
+                        _SBE.INPUT_SOURCE: "abc:CN(C)CCn1cc(c2ccc(F)cc2)c(n1)n3cccc3;CN(C)CCn1cc(c2ccc(F)cc2)c(n1)n3cccc3",
+                        _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STRING,
+                    }
+                ]
+            },
+        }
+        init_step = StepInitializeCompound(**step_conf)
+        init_step.generate_input()
+        init_step.execute()
+
+        self.assertEqual(len(init_step.get_compounds()), 2)
+        self.assertEqual(len(init_step.get_compounds()[0]), 1)
+        self.assertEqual(len(init_step.get_compounds()[0][0]), 0)
+
+        self.assertEqual(
+            init_step.get_compounds()[0][0].get_smile(),
+            "CN(C)CCn1cc(c2ccc(F)cc2)c(n1)n3cccc3",
+        )
+        self.assertEqual(init_step.get_compounds()[0].get_name(), "abc")
+        self.assertEqual(init_step.get_compounds()[1].get_name(), "1")
+        self.assertEqual(init_step.get_compounds()[1].get_compound_number(), 1)
+
+    def test_initialize_compound_smile_enforceIDs(self):
+        step_conf = {
+            _SBE.STEPID: "01_load_molecule",
+            _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION,
+            _SBE.INPUT: {
+                _SBE.INPUT_COMPOUNDS: [
+                    {
+                        _SBE.INPUT_SOURCE: "abc:CN(C)CCn1cc(c2ccc(F)cc2)c(n1)n3cccc3;CN(C)CCn1cc(c2ccc(F)cc2)c(n1)n3cccc3",
+                        _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_STRING,
+                        _SBE.INPUT_ENFORCE_IDS: {
+                            _SBE.INPUT_ENFORCE_COMPOUND_IDS: ["3", 1],
+                            _SBE.INPUT_ENFORCE_ENUMERATION_IDS: [10, 4],
+                        },
+                    }
+                ],
+            },
+        }
+        init_step = StepInitializeCompound(**step_conf)
+        init_step.generate_input()
+        init_step.execute()
+
+        self.assertEqual(len(init_step.get_compounds()), 2)
+        self.assertEqual(len(init_step.get_compounds()[0]), 1)
+        self.assertEqual(len(init_step.get_compounds()[0][0]), 0)
+
+        self.assertEqual(
+            init_step.get_compounds()[0][0].get_smile(),
+            "CN(C)CCn1cc(c2ccc(F)cc2)c(n1)n3cccc3",
+        )
+        self.assertEqual(init_step.get_compounds()[0].get_name(), "abc")
+        self.assertEqual(init_step.get_compounds()[1].get_name(), "1")
+        self.assertEqual(init_step.get_compounds()[0].get_compound_number(), 3)
+        self.assertEqual(init_step.get_compounds()[1][0].get_enumeration_id(), 4)
+
+    def test_initialize_compound_CSV(self):
+        step_conf = {
+            _SBE.STEPID: "01_load_molecule",
+            _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION,
+            _SBE.INPUT: {
+                _SBE.INPUT_COMPOUNDS: [
+                    {
+                        _SBE.INPUT_SOURCE: self._CSV_path,
+                        _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE,
+                        _SBE.INPUT_CSV_COLUMNS: {
+                            _SBE.INPUT_CSV_SMILES_COLUMN: "SMILES"
+                        },
+                        _SBE.INPUT_FORMAT: _SBE.FORMAT_CSV,
+                    }
+                ]
+            },
+        }
+        init_step = StepInitializeCompound(**step_conf)
+        init_step.generate_input()
+        init_step.execute()
+
+        self.assertEqual(len(init_step.get_compounds()), 3)
+        self.assertEqual(len(init_step.get_compounds()[0]), 1)
+        self.assertEqual(len(init_step.get_compounds()[0][0]), 0)
+
+        self.assertEqual(
+            init_step.get_compounds()[0][0].get_smile(), "COc1cc2sc(C)nc2cc1OC"
+        )
+        self.assertEqual(init_step.get_compounds()[1].get_name(), "1")
+
+    def test_initialize_compound_CSV_extended_options(self):
+        step_conf = {
+            _SBE.STEPID: "01_load_molecule",
+            _SBE.STEP_TYPE: _SBE.STEP_INITIALIZATION,
+            _SBE.INPUT: {
+                _SBE.INPUT_COMPOUNDS: [
+                    {
+                        _SBE.INPUT_SOURCE: self._CSV_path_semicolon,
+                        _SBE.INPUT_SOURCE_TYPE: _SBE.INPUT_SOURCE_TYPE_FILE,
+                        _SBE.INPUT_CSV_DELIMITER: ";",
+                        _SBE.INPUT_CSV_COLUMNS: {
+                            _SBE.INPUT_CSV_SMILES_COLUMN: "SMILES",
+                            _SBE.INPUT_CSV_NAMES_COLUMN: "name",
+                        },
+                        _SBE.INPUT_FORMAT: _SBE.FORMAT_CSV,
+                    }
+                ]
+            },
+        }
+        init_step = StepInitializeCompound(**step_conf)
+        init_step.generate_input()
+        init_step.execute()
+
+        self.assertEqual(len(init_step.get_compounds()), 3)
+        self.assertEqual(len(init_step.get_compounds()[0]), 1)
+        self.assertEqual(len(init_step.get_compounds()[0][0]), 0)
+        self.assertEqual(len(init_step.get_compounds()[2]), 2)
+
+        self.assertEqual(
+            init_step.get_compounds()[0][0].get_smile(), "COc1cc2sc(C)nc2cc1OC"
+        )
+        self.assertEqual(init_step.get_compounds()[1].get_name(), "mol2_a")
diff --git a/tests/panther/__init__.py b/tests/panther/__init__.py
new file mode 100644
index 0000000..230e18e
--- /dev/null
+++ b/tests/panther/__init__.py
@@ -0,0 +1 @@
+from tests.panther.test_panther import *
diff --git a/tests/panther/test_panther.py b/tests/panther/test_panther.py
new file mode 100644
index 0000000..3359fa1
--- /dev/null
+++ b/tests/panther/test_panther.py
@@ -0,0 +1,48 @@
+from icolos.utils.enums.program_parameters import PantherEnum
+import os
+import unittest
+from tests.tests_paths import PATHS_EXAMPLEDATA, MAIN_CONFIG
+
+from icolos.utils.enums.step_enums import StepBaseEnum, StepPantherEnum
+from icolos.core.workflow_steps.calculation.panther import StepPanther
+from icolos.utils.general.files_paths import attach_root_path
+
+_SBE = StepBaseEnum
+_SPE = StepPantherEnum()
+_PE = PantherEnum()
+
+
+class Test_Panther(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/panther")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def test_panther_run(self):
+        step_conf = {
+            _SBE.STEPID: "01_panther",
+            _SBE.STEP_TYPE: _SBE.STEP_PANTHER,
+            _SBE.EXEC: {},
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SPE.PANTHER_LOCATION: MAIN_CONFIG["PANTHER_LOCATION"],
+                    _SPE.PANTHER_CONFIG_FILE: attach_root_path(
+                        PATHS_EXAMPLEDATA.PANTHER_CONFIG
+                    ),
+                    _SPE.FIELDS: {
+                        "1-Pdb file": attach_root_path(
+                            PATHS_EXAMPLEDATA.PANTHER_RECEPTOR_PDB
+                        )
+                    },
+                }
+            },
+        }
+        panther_step = StepPanther(**step_conf)
+        panther_step.execute()
+
+        # check we get the negative image back
+        out_path = os.path.join(self._test_dir, "neg_image.mol2")
+        panther_step.write_generic_by_extension(self._test_dir, "mol2")
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 6044)
diff --git a/tests/pmx/__init__.py b/tests/pmx/__init__.py
new file mode 100644
index 0000000..b2e888a
--- /dev/null
+++ b/tests/pmx/__init__.py
@@ -0,0 +1,11 @@
+from tests.pmx.test_prepare_simulations import *
+from tests.pmx.test_analyse import *
+from tests.pmx.test_prepare_transitions import *
+from tests.pmx.test_atomMapping import *
+from tests.pmx.test_ligandHybrid import *
+from tests.pmx.test_box_water_ions import *
+from tests.pmx.test_setup_workpath import *
+from tests.pmx.test_assemble_systems import *
+
+# from tests.pmx.test_run_simulations import *
+# from tests.pmx.test_abfe import *
diff --git a/tests/pmx/test_abfe.py b/tests/pmx/test_abfe.py
new file mode 100644
index 0000000..3d64edd
--- /dev/null
+++ b/tests/pmx/test_abfe.py
@@ -0,0 +1,62 @@
+import unittest
+import os
+from icolos.core.containers.generic import GenericData
+from icolos.core.workflow_steps.pmx.abfe import StepPMXabfe
+from icolos.utils.enums.step_enums import StepBaseEnum
+from tests.tests_paths import PATHS_EXAMPLEDATA, export_unit_test_env_vars
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.core.composite_agents.workflow import WorkFlow
+import shutil
+
+_SBE = StepBaseEnum
+
+
+class Test_PMXabfe(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/pmx/abfe")
+        if os.path.isdir(cls._test_dir):
+            shutil.rmtree(cls._test_dir)
+        os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        with open(PATHS_EXAMPLEDATA.PMX_ABFE_INPUT_COMPLEX, "r") as f:
+            data = f.read()
+        self.protein = GenericData(file_name="complex.pdb", file_data=data)
+
+    def test_pmx_abfe(self):
+        step_conf = {
+            _SBE.STEPID: "01_PMX_ABFE",
+            _SBE.STEP_TYPE: _SBE.STEP_PMX_ABFE,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2",
+                _SBE.EXEC_PARALLELIZATION: {
+                    _SBE.EXEC_PARALLELIZATION_CORES: 8,
+                    _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1,
+                },
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {_SBE.SETTINGS_ARGUMENTS_FLAGS: ["--build"]},
+                _SBE.SETTINGS_ADDITIONAL: {
+                    # settings for protein parametrisation
+                    "forcefield": "amber03",
+                    "water": "tip3p",
+                },
+            },
+        }
+
+        step_pmx_abfe = StepPMXabfe(**step_conf)
+        step_pmx_abfe.data.generic.add_file(self.protein)
+
+        step_pmx_abfe.work_dir = self._test_dir
+        step_pmx_abfe._workflow_object = WorkFlow()
+        step_pmx_abfe.execute()
+
+        self.assertEqual(
+            os.path.isfile(os.path.join(self._test_dir, "complex/genion.tpr")), True
+        )
+
+        stat_inf = os.stat(os.path.join(self._test_dir, "protein.gro"))
+        self.assertGreater(stat_inf.st_size, 70000)
diff --git a/tests/pmx/test_analyse.py b/tests/pmx/test_analyse.py
new file mode 100644
index 0000000..dba50ee
--- /dev/null
+++ b/tests/pmx/test_analyse.py
@@ -0,0 +1,65 @@
+import unittest
+import os
+from icolos.core.workflow_steps.pmx.run_analysis import StepPMXRunAnalysis
+from icolos.utils.enums.step_enums import StepBaseEnum
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    create_test_dir,
+    MAIN_CONFIG,
+    export_unit_test_env_vars,
+    get_ligands_as_compounds_with_conformers,
+)
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.core.composite_agents.workflow import WorkFlow
+from icolos.core.containers.perturbation_map import PerturbationMap
+
+_SBE = StepBaseEnum
+
+
+class Test_PMXanalyse(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/pmx/analyse")
+        #
+        create_test_dir(PATHS_EXAMPLEDATA.RUN_ANALYSIS_TEST_DIR, cls._test_dir)
+
+    def setUp(self):
+        self.compounds = get_ligands_as_compounds_with_conformers(
+            PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS
+        )
+        p_map = PerturbationMap(compounds=self.compounds)
+        p_map.parse_map_file(file_path=PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG_SINGLE_EDGE)
+        self.p_map = p_map
+        export_unit_test_env_vars()
+
+    def test_pmx_analyse(self):
+        step_conf = {
+            _SBE.STEPID: "prepare_simulations",
+            _SBE.STEP_TYPE: "pmx_analyse",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a",
+                _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["PMX"]["CLI_ENTRYPOINT"],
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {},
+            },
+        }
+        step_pmx_analyse = StepPMXRunAnalysis(**step_conf)
+        step_pmx_analyse.work_dir = self._test_dir
+        step_pmx_analyse._workflow_object = WorkFlow()
+        step_pmx_analyse._workflow_object.workflow_data.perturbation_map = self.p_map
+        step_pmx_analyse.execute()
+
+        stat_inf = os.stat(
+            os.path.join(self._test_dir, "0cd4b47_4f2ffa1/protein/analyse1/results.txt")
+        )
+
+        self.assertGreater(stat_inf.st_size, 19000)
+
+        stat_inf = os.stat(os.path.join(self._test_dir, "resultsAll.csv"))
+
+        self.assertGreater(stat_inf.st_size, 480)
diff --git a/tests/pmx/test_assemble_systems.py b/tests/pmx/test_assemble_systems.py
new file mode 100644
index 0000000..a424315
--- /dev/null
+++ b/tests/pmx/test_assemble_systems.py
@@ -0,0 +1,77 @@
+import unittest
+import os
+from icolos.core.workflow_steps.pmx.assemble_systems import StepPMXAssembleSystems
+from icolos.core.containers.generic import GenericData
+from icolos.core.composite_agents.workflow import WorkFlow
+from icolos.utils.enums.step_enums import StepBaseEnum
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    MAIN_CONFIG,
+    create_test_dir,
+    export_unit_test_env_vars,
+    get_ligands_as_compounds_with_conformers,
+)
+from icolos.core.containers.perturbation_map import PerturbationMap
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.utils.enums.program_parameters import PMXEnum, PMXAtomMappingEnum
+
+_SBE = StepBaseEnum
+_PE = PMXEnum()
+_PAE = PMXAtomMappingEnum()
+
+
+class Test_PMXAssembleSystems(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/pmx/test_assemble_systems")
+
+        create_test_dir(PATHS_EXAMPLEDATA.ASSEMBLE_SYSTEMS_TEST_DIR, cls._test_dir)
+
+    def setUp(self):
+        self.compounds = get_ligands_as_compounds_with_conformers(
+            PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS
+        )
+        p_map = PerturbationMap(compounds=self.compounds)
+        p_map.parse_map_file(file_path=PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG_SINGLE_EDGE)
+        self.p_map = p_map
+        with open(PATHS_EXAMPLEDATA.FEP_PLUS_OTHER_PROTEIN, "r") as f:
+            data = f.read()
+        self.protein = GenericData(file_name="protein.pdb", file_data=data)
+
+        export_unit_test_env_vars()
+
+    def test_assembleSystems(self):
+
+        step_conf = {
+            _SBE.STEPID: "assemble_systems",
+            _SBE.STEP_TYPE: "pmx_assemble_systems",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2",
+                _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["PMX"]["CLI_ENTRYPOINT"],
+                _SBE.EXEC_PARALLELIZATION: {
+                    _SBE.EXEC_PARALLELIZATION_CORES: 8,
+                    _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1,
+                },
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {},
+            },
+        }
+
+        step_assembleSystems = StepPMXAssembleSystems(**step_conf)
+        step_assembleSystems.work_dir = self._test_dir
+        step_assembleSystems._workflow_object = WorkFlow()
+        step_assembleSystems._workflow_object.workflow_data.perturbation_map = (
+            self.p_map
+        )
+        step_assembleSystems.data.generic.add_file(self.protein)
+        step_assembleSystems.execute()
+
+        stat_inf = os.stat(
+            os.path.join(self._test_dir, "0cd4b47_4f2ffa1/hybridStrTop/ffmerged.itp")
+        )
+        self.assertEqual(stat_inf.st_size, 1695)
diff --git a/tests/pmx/test_atomMapping.py b/tests/pmx/test_atomMapping.py
new file mode 100644
index 0000000..2ea88ae
--- /dev/null
+++ b/tests/pmx/test_atomMapping.py
@@ -0,0 +1,67 @@
+import unittest
+import os
+from icolos.core.workflow_steps.pmx.atomMapping import StepPMXatomMapping
+from icolos.utils.enums.step_enums import StepBaseEnum
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    create_test_dir,
+    export_unit_test_env_vars,
+    get_ligands_as_compounds_with_conformers,
+)
+from icolos.core.containers.perturbation_map import PerturbationMap
+from icolos.core.composite_agents.workflow import WorkFlow
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.utils.enums.program_parameters import PMXEnum, PMXAtomMappingEnum
+
+_SBE = StepBaseEnum
+_PE = PMXEnum()
+_PAE = PMXAtomMappingEnum()
+
+
+class Test_PMXatomMapping(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/pmx/test_atomMapping")
+        create_test_dir(PATHS_EXAMPLEDATA.ATOM_MAPPING_TEST_DIR, cls._test_dir)
+
+    def setUp(self):
+        self.compounds = get_ligands_as_compounds_with_conformers(
+            PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS
+        )
+        p_map = PerturbationMap(compounds=self.compounds)
+        p_map.parse_map_file(file_path=PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG_SINGLE_EDGE)
+        self.p_map = p_map
+
+        export_unit_test_env_vars()
+
+    def test_atomMapping(self):
+
+        step_conf = {
+            _SBE.STEPID: "atommapping",
+            _SBE.STEP_TYPE: "pmx_atommapping",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2",
+                _SBE.EXEC_PARALLELIZATION: {
+                    _SBE.EXEC_PARALLELIZATION_CORES: 8,
+                    _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1,
+                },
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {},
+            },
+        }
+
+        step_atom_mapping = StepPMXatomMapping(**step_conf)
+        step_atom_mapping.work_dir = self._test_dir
+        step_atom_mapping._workflow_object = WorkFlow()
+        step_atom_mapping._workflow_object.workflow_data.perturbation_map = self.p_map
+        step_atom_mapping.execute()
+
+        stat_inf = os.stat(
+            os.path.join(self._test_dir, "0cd4b47_4f2ffa1/hybridStrTop/out_pdb1.pdb")
+        )
+        self.assertEqual(stat_inf.st_size, 4631)
diff --git a/tests/pmx/test_box_water_ions.py b/tests/pmx/test_box_water_ions.py
new file mode 100644
index 0000000..fa9199d
--- /dev/null
+++ b/tests/pmx/test_box_water_ions.py
@@ -0,0 +1,70 @@
+import unittest
+import os
+from icolos.core.workflow_steps.pmx.box_water_ions import StepPMXBoxWaterIons
+from icolos.core.composite_agents.workflow import WorkFlow
+from icolos.utils.enums.step_enums import StepBaseEnum
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    create_test_dir,
+    export_unit_test_env_vars,
+    get_ligands_as_compounds_with_conformers,
+    MAIN_CONFIG,
+)
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.core.containers.perturbation_map import PerturbationMap
+
+_SBE = StepBaseEnum
+
+
+class Test_PMXBoxWaterIons(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/pmx/test_box_water_ions")
+
+        create_test_dir(PATHS_EXAMPLEDATA.BOX_WATER_IONS_TEST_DIR, cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        # initialise the map object for the two test ligands
+        self.compounds = get_ligands_as_compounds_with_conformers(
+            PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS
+        )
+        p_map = PerturbationMap(compounds=self.compounds)
+        p_map.parse_map_file(file_path=PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG_SINGLE_EDGE)
+        self.p_map = p_map
+
+    # def tearDown(self):
+    #     shutil.rmtree(self._test_dir)
+
+    def test_box_water_ions(self):
+        conf = {
+            _SBE.STEPID: "01_PMX_BOX_WATER_IONS",
+            _SBE.STEP_TYPE: _SBE.STEP_PMX_BOX_WATER_IONS,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2",
+                _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["PMX"]["CLI_ENTRYPOINT"],
+                _SBE.EXEC_PARALLELIZATION: {
+                    _SBE.EXEC_PARALLELIZATION_CORES: 8,
+                    _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1,
+                },
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ADDITIONAL: {},
+            },
+        }
+        step = StepPMXBoxWaterIons(**conf)
+        step.data.compounds = self.compounds
+        step.work_dir = self._test_dir
+        step._workflow_object = WorkFlow()
+        step._workflow_object.workflow_data.perturbation_map = self.p_map
+        step.execute()
+        stat_inf = os.stat(
+            os.path.join(self._test_dir, "0cd4b47_4f2ffa1/water/tpr.tpr")
+        )
+        self.assertGreater(stat_inf.st_size, 212100)
+
+        stat_inf = os.stat(
+            os.path.join(self._test_dir, "0cd4b47_4f2ffa1/protein/tpr.tpr")
+        )
+        self.assertGreater(stat_inf.st_size, 3505650)
diff --git a/tests/pmx/test_doublebox.py b/tests/pmx/test_doublebox.py
new file mode 100644
index 0000000..c969296
--- /dev/null
+++ b/tests/pmx/test_doublebox.py
@@ -0,0 +1,20 @@
+import unittest
+import os
+from tests.tests_paths import export_unit_test_env_vars
+from icolos.utils.general.files_paths import attach_root_path
+
+
+class Test_PMXdoublebox(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/pmx/doublebox")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        pass
+
+    def test_XYZ(self):
+        pass
diff --git a/tests/pmx/test_genlib.py b/tests/pmx/test_genlib.py
new file mode 100644
index 0000000..a8185bb
--- /dev/null
+++ b/tests/pmx/test_genlib.py
@@ -0,0 +1,20 @@
+import unittest
+import os
+from tests.tests_paths import export_unit_test_env_vars
+from icolos.utils.general.files_paths import attach_root_path
+
+
+class Test_PMXgenlib(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/pmx/genlib")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        pass
+
+    def test_XYZ(self):
+        pass
diff --git a/tests/pmx/test_gentop.py b/tests/pmx/test_gentop.py
new file mode 100644
index 0000000..0384648
--- /dev/null
+++ b/tests/pmx/test_gentop.py
@@ -0,0 +1,20 @@
+import unittest
+import os
+from tests.tests_paths import export_unit_test_env_vars
+from icolos.utils.general.files_paths import attach_root_path
+
+
+class Test_PMXgentop(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/pmx/gentop")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        pass
+
+    def test_XYZ(self):
+        pass
diff --git a/tests/pmx/test_ligandHybrid.py b/tests/pmx/test_ligandHybrid.py
new file mode 100644
index 0000000..815b20c
--- /dev/null
+++ b/tests/pmx/test_ligandHybrid.py
@@ -0,0 +1,70 @@
+import unittest
+import os
+from icolos.core.workflow_steps.pmx.ligandHybrid import StepPMXligandHybrid
+from icolos.core.containers.perturbation_map import PerturbationMap
+from icolos.utils.enums.program_parameters import PMXEnum, PMXLigandHybridEnum
+from icolos.core.composite_agents.workflow import WorkFlow
+from icolos.utils.enums.step_enums import StepBaseEnum
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    create_test_dir,
+    get_ligands_as_compounds_with_conformers,
+    export_unit_test_env_vars,
+)
+from icolos.utils.general.files_paths import attach_root_path
+
+
+_SBE = StepBaseEnum
+_PE = PMXEnum()
+_PHE = PMXLigandHybridEnum()
+
+
+class Test_PMXligandHybrid(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/pmx/test_ligandHybrid")
+        # if not os.path.isdir(cls._test_dir):
+        #     os.makedirs(cls._test_dir)
+        create_test_dir(PATHS_EXAMPLEDATA.LIGAND_HYBRID_TEST_DIR, cls._test_dir)
+
+    def setUp(self):
+        self.compounds = get_ligands_as_compounds_with_conformers(
+            PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS
+        )
+        p_map = PerturbationMap(compounds=self.compounds)
+        p_map.parse_map_file(file_path=PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG_SINGLE_EDGE)
+        self.p_map = p_map
+
+        export_unit_test_env_vars()
+
+    # def tearDown(self):
+    #     shutil.rmtree(self._test_dir)
+
+    def test_build_hybrid_topology_and_structure(self):
+        merged_itp_path = os.path.join(
+            self._test_dir, "0cd4b47_4f2ffa1/hybridStrTop/merged.itp"
+        )
+
+        step_conf = {
+            _SBE.STEPID: "ligand_hybrid",
+            _SBE.STEP_TYPE: "pmx_ligandHybrid",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2"
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {},
+            },
+        }
+
+        step_ligand_hybrid = StepPMXligandHybrid(**step_conf)
+        step_ligand_hybrid.work_dir = self._test_dir
+        step_ligand_hybrid._workflow_object = WorkFlow()
+        step_ligand_hybrid._workflow_object.workflow_data.perturbation_map = self.p_map
+        step_ligand_hybrid.execute()
+
+        stat_inf = os.stat(merged_itp_path)
+        self.assertEqual(stat_inf.st_size, 39468)
diff --git a/tests/pmx/test_mutate.py b/tests/pmx/test_mutate.py
new file mode 100644
index 0000000..3c77e55
--- /dev/null
+++ b/tests/pmx/test_mutate.py
@@ -0,0 +1,20 @@
+import unittest
+import os
+from tests.tests_paths import export_unit_test_env_vars
+from icolos.utils.general.files_paths import attach_root_path
+
+
+class Test_PMXmutate(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/pmx/mutate")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        pass
+
+    def test_XYZ(self):
+        pass
diff --git a/tests/pmx/test_prepare_simulations.py b/tests/pmx/test_prepare_simulations.py
new file mode 100644
index 0000000..9ab1ef0
--- /dev/null
+++ b/tests/pmx/test_prepare_simulations.py
@@ -0,0 +1,73 @@
+import unittest
+import os
+from icolos.core.workflow_steps.pmx.prepare_simulations import StepPMXPrepareSimulations
+from icolos.utils.enums.step_enums import StepBaseEnum
+from icolos.core.composite_agents.workflow import WorkFlow
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    MAIN_CONFIG,
+    export_unit_test_env_vars,
+    create_test_dir,
+    get_ligands_as_compounds_with_conformers,
+)
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.core.containers.perturbation_map import PerturbationMap
+
+_SBE = StepBaseEnum
+
+
+class Test_PMXPrepareSimulations(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/pmx/test_prepare_simulations")
+
+        create_test_dir(PATHS_EXAMPLEDATA.PREPARE_SIMULATIONS_TEST_DIR, cls._test_dir)
+
+    def setUp(self):
+        self.compounds = get_ligands_as_compounds_with_conformers(
+            PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS
+        )
+        p_map = PerturbationMap(compounds=self.compounds)
+        p_map.parse_map_file(file_path=PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG_SINGLE_EDGE)
+        self.p_map = p_map
+
+        export_unit_test_env_vars()
+
+    def test_prepare_simulations(self):
+
+        step_conf = {
+            _SBE.STEPID: "prepare_simulations",
+            _SBE.STEP_TYPE: "pmx_prepare_simulations",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2",
+                _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["PMX"]["CLI_ENTRYPOINT"],
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {"sim_type": "em"},
+            },
+        }
+
+        step_prepare_simulations = StepPMXPrepareSimulations(**step_conf)
+        step_prepare_simulations.work_dir = self._test_dir
+        step_prepare_simulations._workflow_object = WorkFlow()
+        step_prepare_simulations._workflow_object.workflow_data.perturbation_map = (
+            self.p_map
+        )
+        step_prepare_simulations.execute()
+
+        stat_inf = os.stat(
+            os.path.join(self._test_dir, "0cd4b47_4f2ffa1/water/stateA/run1/em/tpr.tpr")
+        )
+
+        self.assertGreater(stat_inf.st_size, 213300)
+
+        stat_inf = os.stat(
+            os.path.join(
+                self._test_dir, "0cd4b47_4f2ffa1/protein/stateB/run3/em/tpr.tpr"
+            )
+        )
+        self.assertGreater(stat_inf.st_size, 3501000)
diff --git a/tests/pmx/test_prepare_transitions.py b/tests/pmx/test_prepare_transitions.py
new file mode 100644
index 0000000..9d4d13f
--- /dev/null
+++ b/tests/pmx/test_prepare_transitions.py
@@ -0,0 +1,95 @@
+import unittest
+import os
+from icolos.core.workflow_steps.pmx.prepare_transitions import StepPMXPrepareTransitions
+from icolos.core.composite_agents.workflow import WorkFlow
+from icolos.utils.enums.step_enums import StepBaseEnum
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    create_test_dir,
+    MAIN_CONFIG,
+    export_unit_test_env_vars,
+    get_ligands_as_compounds_with_conformers,
+)
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.core.containers.perturbation_map import PerturbationMap
+
+_SBE = StepBaseEnum
+
+
+class Test_PMXPrepareTransitions(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/pmx/prepare_transitions")
+        create_test_dir(PATHS_EXAMPLEDATA.PREPARE_TRANSITIONS_TEST_DIR, cls._test_dir)
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        self.compounds = get_ligands_as_compounds_with_conformers(
+            PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS
+        )
+        p_map = PerturbationMap(compounds=self.compounds)
+        p_map.parse_map_file(
+            file_path=PATHS_EXAMPLEDATA.PMX_FEP_MAP_LOG_PREPARE_TRANSITIONS
+        )
+        p_map.replicas = 1
+        self.p_map = p_map
+
+    def test_pmx_prepare_transitions(self):
+
+        step_conf = {
+            _SBE.STEPID: "prepare_simulations",
+            _SBE.STEP_TYPE: "pmx_prepare_simulations",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2",
+                _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["PMX"]["CLI_ENTRYPOINT"],
+                _SBE.EXEC_PARALLELIZATION: {
+                    _SBE.EXEC_PARALLELIZATION_CORES: 8,
+                    _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1,
+                },
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {"sim_type": "transitions"},
+            },
+        }
+
+        step_prep_trans = StepPMXPrepareTransitions(**step_conf)
+        step_prep_trans.work_dir = self._test_dir
+        step_prep_trans._workflow_object = WorkFlow()
+        step_prep_trans._workflow_object.workflow_data.perturbation_map = self.p_map
+        step_prep_trans.execute()
+
+        stat_inf = os.stat(
+            os.path.join(
+                self._test_dir,
+                "4f2ffa1_bd688d5/protein/stateA/run1/transitions/frame1.gro",
+            )
+        )
+        self.assertGreater(stat_inf.st_size, 6159200)
+
+        stat_inf = os.stat(
+            os.path.join(
+                self._test_dir,
+                "4f2ffa1_bd688d5/protein/stateB/run1/transitions/frame1.gro",
+            )
+        )
+        self.assertGreater(stat_inf.st_size, 6159200)
+
+        stat_inf = os.stat(
+            os.path.join(
+                self._test_dir,
+                "4f2ffa1_bd688d5/water/stateA/run1/transitions/frame1.gro",
+            )
+        )
+        self.assertGreater(stat_inf.st_size, 887000)
+
+        stat_inf = os.stat(
+            os.path.join(
+                self._test_dir,
+                "4f2ffa1_bd688d5/water/stateB/run1/transitions/frame1.gro",
+            )
+        )
+        self.assertGreater(stat_inf.st_size, 887000)
diff --git a/tests/pmx/test_run_simulations.py b/tests/pmx/test_run_simulations.py
new file mode 100644
index 0000000..48d6bde
--- /dev/null
+++ b/tests/pmx/test_run_simulations.py
@@ -0,0 +1,113 @@
+import unittest
+import os
+from icolos.core.containers.generic import GenericData
+from icolos.core.workflow_steps.pmx.run_simulations import StepPMXRunSimulations
+from icolos.utils.enums.step_enums import StepBaseEnum
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    create_test_dir,
+    MAIN_CONFIG,
+    export_unit_test_env_vars,
+    get_ligands_as_compounds_with_conformers,
+)
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.core.containers.perturbation_map import PerturbationMap
+from icolos.core.composite_agents.workflow import WorkFlow
+
+_SBE = StepBaseEnum
+
+
+class Test_PMXRunSimulations(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/pmx/run_simulations_test_dir")
+        create_test_dir(PATHS_EXAMPLEDATA.RUN_SIMULATIONS_TEST_DIR, cls._test_dir)
+
+    def setUp(self):
+        self.compounds = get_ligands_as_compounds_with_conformers(
+            PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS
+        )
+        with open(PATHS_EXAMPLEDATA.FEP_PLUS_OTHER_PROTEIN, "r") as f:
+            data = f.read()
+        self.protein = GenericData(file_name="protein.pdb", file_data=data)
+        p_map = PerturbationMap(compounds=self.compounds, protein=self.protein)
+        p_map.parse_map_file(file_path=PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG_MIN)
+        self.p_map = p_map
+        export_unit_test_env_vars()
+
+    def test_run_simulations(self):
+        step_conf = {
+            _SBE.STEPID: "prepare_simulations",
+            _SBE.STEP_TYPE: "pmx_prepare_simulations",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2021-fosscuda-2019a-PLUMED-2.7.1-Python-3.7.2",
+                _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["PMX"]["CLI_ENTRYPOINT"],
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {"sim_type": "em"},
+            },
+        }
+
+        step_run_simulations = StepPMXRunSimulations(**step_conf)
+        step_run_simulations.work_dir = self._test_dir
+        step_run_simulations._workflow_object = WorkFlow()
+        step_run_simulations.get_workflow_object().workflow_data.perturbation_map = (
+            self.p_map
+        )
+        step_run_simulations.execute()
+
+        stat_inf = os.stat(
+            os.path.join(self._test_dir, "0cd4b47_4f2ffa1/water/stateB/run3/em/md.log")
+        )
+
+        self.assertEqual(stat_inf.st_size, 1347767)
+
+        stat_inf = os.stat(
+            os.path.join(
+                self._test_dir, "0cd4b47_4f2ffa1/protein/stateB/run3/em/tpr.tpr"
+            )
+        )
+        self.assertEqual(stat_inf.st_size, 3501084)
+
+    # def test_run_simulations_parallel(self):
+    #     step_conf = {
+    #         _SBE.STEPID: "prepare_simulations",
+    #         _SBE.STEP_TYPE: "pmx_prepare_simulations",
+    #         _SBE.EXEC: {
+    #             _SBE.EXEC_PREFIXEXECUTION: "module load GROMACS/2020.3-fosscuda-2019a",
+    #             _SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["PMX"]["CLI_ENTRYPOINT"],
+    #             _SBE.EXEC_PARALLELIZATION: {
+    #                 _SBE.EXEC_PARALLELIZATION_CORES: 2
+    #             }
+    #         },
+    #         _SBE.SETTINGS: {
+    #             _SBE.SETTINGS_ARGUMENTS: {
+    #                 _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+    #                 _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+    #             },
+    #             _SBE.SETTINGS_ADDITIONAL: {"sim_type": "em"},
+    #         },
+    #     }
+
+    #     step_run_simulations = StepPMXRunSimulations(**step_conf)
+    #     step_run_simulations.work_dir = self._test_dir
+    #     step_run_simulations._workflow_object = WorkFlow()
+    #     step_run_simulations.get_workflow_object().perturbation_map = self.p_map
+    #     step_run_simulations.execute()
+
+    #     stat_inf = os.stat(
+    #         os.path.join(self._test_dir, "0cd4b47_4f2ffa1/water/stateB/run3/em/md.log")
+    #     )
+
+    #     self.assertEqual(stat_inf.st_size, 1347767)
+
+    # stat_inf = os.stat(
+    #     os.path.join(
+    #         self._test_dir, "0cd4b47_4f2ffa1/protein/stateB/run3/em/tpr.tpr"
+    #     )
+    # )
+    # self.assertEqual(stat_inf.st_size, 3501084)
diff --git a/tests/pmx/test_setup_workpath.py b/tests/pmx/test_setup_workpath.py
new file mode 100644
index 0000000..2d9c96d
--- /dev/null
+++ b/tests/pmx/test_setup_workpath.py
@@ -0,0 +1,80 @@
+import unittest
+import os
+from icolos.core.containers.generic import GenericData
+from icolos.core.workflow_steps.pmx.setup_workpath import StepPMXSetup
+from icolos.utils.enums.step_enums import StepBaseEnum
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    export_unit_test_env_vars,
+    get_ligands_as_compounds_with_conformers,
+)
+from icolos.utils.general.files_paths import attach_root_path
+import shutil
+from icolos.core.composite_agents.workflow import WorkFlow
+
+_SBE = StepBaseEnum
+
+
+class Test_PMX_setup(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/pmx/test_setupWorkpath")
+        if os.path.exists(cls._test_dir):
+            shutil.rmtree(cls._test_dir)
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+        export_unit_test_env_vars()
+
+    def setUp(self):
+        self.compounds = get_ligands_as_compounds_with_conformers(
+            PATHS_EXAMPLEDATA.FEP_PLUS_LIGANDS
+        )
+        with open(PATHS_EXAMPLEDATA.FEP_PLUS_OTHER_PROTEIN, "r") as f:
+            data = f.read()
+        self.protein = GenericData(file_name="protein.pdb", file_data=data)
+        with open(PATHS_EXAMPLEDATA.FEP_PLUS_MAP_LOG, "r") as f:
+            data = f.read()
+        self.log_file = GenericData(
+            file_name="map.log", file_data=data, extension="log"
+        )
+
+    def test_setup_workpath(self):
+        step_conf = {
+            _SBE.STEPID: "01_PMX_SETUP",
+            _SBE.STEP_TYPE: _SBE.STEP_PMX_SETUP,
+            _SBE.EXEC: {
+                _SBE.EXEC_PARALLELIZATION: {
+                    _SBE.EXEC_PARALLELIZATION_CORES: 8,
+                    _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1,
+                }
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ADDITIONAL: {
+                    # settings for protein parametrisation
+                    "forcefield": "amber03",
+                    "water": "tip3p",
+                },
+            },
+        }
+
+        step_setup = StepPMXSetup(**step_conf)
+        step_setup.data.compounds = self.compounds
+        step_setup.data.generic.add_file(self.protein)
+        step_setup.data.generic.add_file(self.log_file)
+        step_setup.data.generic.add_file(
+            GenericData(
+                file_name="mdp_files",
+                extension="mdp",
+                file_data=PATHS_EXAMPLEDATA.PMX_MDP_FILES,
+            )
+        )
+        step_setup.work_dir = self._test_dir
+        step_setup._workflow_object = WorkFlow()
+        step_setup.execute()
+
+        assert os.path.isdir(os.path.join(self._test_dir, "input"))
+        assert os.path.isdir(
+            os.path.join(self._test_dir, "0cd4b47_4f2ffa1/water/stateA/run1/em")
+        )
+        # stat some of the ligand files and check they've been deposited in the right directory
diff --git a/tests/prediction/__init__.py b/tests/prediction/__init__.py
new file mode 100644
index 0000000..84e4ecc
--- /dev/null
+++ b/tests/prediction/__init__.py
@@ -0,0 +1,3 @@
+from tests.prediction.test_predictor import *
+from tests.prediction.test_model_building import *
+from tests.prediction.test_active_learning import *
diff --git a/tests/prediction/test_active_learning.py b/tests/prediction/test_active_learning.py
new file mode 100644
index 0000000..3ff5796
--- /dev/null
+++ b/tests/prediction/test_active_learning.py
@@ -0,0 +1,100 @@
+import unittest
+from icolos.core.workflow_steps.prediction.active_learning import StepActiveLearning
+from icolos.utils.enums.program_parameters import GlideEnum
+import os
+from icolos.utils.enums.step_enums import (
+    StepActiveLearningEnum,
+    StepBaseEnum,
+    StepGlideEnum,
+)
+from icolos.utils.general.files_paths import attach_root_path
+from tests.tests_paths import PATHS_1UYD, PATHS_EXAMPLEDATA
+
+_SBE = StepBaseEnum
+_EE = GlideEnum()
+_SGE = StepGlideEnum()
+_SALE = StepActiveLearningEnum()
+
+
+class TestActiveLearning(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/active_learning")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        self.ligands = PATHS_1UYD.LIGANDS
+        self.receptor_path = PATHS_1UYD.GRID_PATH
+        self.receptor_constraints_path = PATHS_1UYD.GRID_CONSTRAINTS_PATH
+        self.receptor_path_COX2 = PATHS_EXAMPLEDATA.PRIME_COX2_GRID
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_active_learning_docking(self):
+        step_conf = {
+            _SBE.STEPID: "01_active_learning",
+            _SBE.STEP_TYPE: _SBE.STEP_ACTIVE_LEARNING,
+            _SBE.EXEC: {},
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SALE.VIRTUAL_LIB: self.ligands,
+                    _SALE.N_ROUNDS: "2",
+                    _SALE.INIT_SAMPLES: "2",
+                    _SALE.BATCH_SIZE: "4",
+                    _SALE.CRITERIA: _SGE.GLIDE_DOCKING_SCORE,
+                    # config for embedding + docking
+                    _SALE.ORACLE_CONFIG: [
+                        {
+                            _SBE.STEPID: "01_glide",
+                            _SBE.STEP_TYPE: _SBE.STEP_GLIDE,
+                            _SBE.EXEC: {
+                                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws",
+                                _SBE.EXEC_PARALLELIZATION: {
+                                    _SBE.EXEC_PARALLELIZATION_CORES: 8,
+                                    _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1,
+                                },
+                                _SBE.EXEC_FAILUREPOLICY: {
+                                    _SBE.EXEC_FAILUREPOLICY_NTRIES: 1
+                                },
+                            },
+                            _SBE.SETTINGS: {
+                                _SBE.SETTINGS_ARGUMENTS: {
+                                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                                        _EE.GLIDE_HOST: "cpu-only"
+                                    },
+                                },
+                                _SBE.SETTINGS_ADDITIONAL: {
+                                    _SGE.CONFIGURATION: {
+                                        _EE.GLIDE_AMIDE_MODE: "trans",
+                                        _EE.GLIDE_EXPANDED_SAMPLING: "True",
+                                        _EE.GLIDE_GRIDFILE: [self.receptor_path],
+                                        _EE.GLIDE_NENHANCED_SAMPLING: "1",
+                                        _EE.GLIDE_POSE_OUTTYPE: _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB,
+                                        _EE.GLIDE_POSES_PER_LIG: "3",
+                                        _EE.GLIDE_POSTDOCK_NPOSE: "25",
+                                        _EE.GLIDE_POSTDOCKSTRAIN: "True",
+                                        _EE.GLIDE_PRECISION: "SP",
+                                        _EE.GLIDE_REWARD_INTRA_HBONDS: "True",
+                                    }
+                                },
+                            },
+                        },
+                    ],
+                },
+            },
+        }
+
+        step_active_learning = StepActiveLearning(**step_conf)
+        step_active_learning.execute()
+        out_path = os.path.join(self._test_dir, "production_model.pkl")
+        data = step_active_learning.data.generic.get_files_by_extension(ext="pkl")[
+            0
+        ].get_data()
+        with open(out_path, "wb") as f:
+            f.write(data)
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 348000)
diff --git a/tests/prediction/test_model_building.py b/tests/prediction/test_model_building.py
new file mode 100644
index 0000000..651583a
--- /dev/null
+++ b/tests/prediction/test_model_building.py
@@ -0,0 +1,91 @@
+import json
+import unittest
+import os
+
+from icolos.core.workflow_steps.prediction.model_building import StepModelBuilder
+from icolos.utils.enums.program_parameters import ModelBuilderEnum
+
+from icolos.utils.enums.step_enums import StepBaseEnum, StepModelBuilderEnum
+
+from tests.tests_paths import PATHS_EXAMPLEDATA, load_SDF_docked, MAIN_CONFIG
+from icolos.utils.general.files_paths import attach_root_path
+
+_SBE = StepBaseEnum
+_SME = ModelBuilderEnum()
+_SMBE = StepModelBuilderEnum()
+
+
+class Test_Model_Building(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/model_building")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        self._example_JSON = PATHS_EXAMPLEDATA.MODEL_BUILDER_EXAMPLE_JSON
+        self._compounds = load_SDF_docked(
+            PATHS_EXAMPLEDATA.MODEL_BUILDER_TEST_INPUT_SDF
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_build_model(self):
+        step_conf = {
+            _SBE.STEPID: "01_model_building",
+            _SBE.STEP_TYPE: _SBE.STEP_PREDICTION,
+            _SBE.EXEC: {
+                _SBE.EXEC_BINARYLOCATION: " ".join(
+                    [
+                        MAIN_CONFIG["OPTUNA_AZ"]["ENVIRONMENT_PYTHON"],
+                        MAIN_CONFIG["OPTUNA_AZ"]["ENTRY_POINT_LOCATION"],
+                    ]
+                )
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _SME.CONFIG: self._example_JSON,
+                        _SME.BEST_BUILDCONFIG_OUTPATH: os.path.join(
+                            self._test_dir, "buildconfig.json"
+                        ),
+                        _SME.BEST_MODEL_OUTPATH: os.path.join(
+                            self._test_dir, "best_model_trial.pkl"
+                        ),
+                        _SME.MERGED_MODEL_OUTPATH: os.path.join(
+                            self._test_dir, "production_model.pkl"
+                        ),
+                    }
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SMBE.DATA: {
+                        _SMBE.DATA_INPUT_COLUMN: "original_smiles",
+                        _SMBE.DATA_RESPONSE_COLUMN: _SBE.ANNOTATION_TAG_DOCKING_SCORE,
+                    }
+                },
+            },
+        }
+        model_step = StepModelBuilder(**step_conf)
+        model_step.data.compounds = self._compounds
+
+        model_step.execute()
+
+        # check, that the input data has been written as expected
+        out_path = os.path.join(self._test_dir, "best_param.json")
+        container = model_step.data.generic.get_files_by_extension(ext="json")[0]
+        with open(out_path, "w") as f:
+            json.dump(container.get_data(), f, indent=4)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(_SMBE.TMP_OUTPUT_BEST_PARAMETERS, container.get_file_name())
+        self.assertGreater(stat_inf.st_size, 800)
+
+        # check, that a model has been produced
+        # note, that the model's size strongly depends on the underlying algorithm / hyper-parameters chosen
+        out_path = os.path.join(self._test_dir, "production_model.pkl")
+        data = model_step.data.generic.get_files_by_extension(ext="pkl")[0].get_data()
+        with open(out_path, "wb") as f:
+            f.write(data)
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 5000)
diff --git a/tests/prediction/test_predictor.py b/tests/prediction/test_predictor.py
new file mode 100644
index 0000000..096fd97
--- /dev/null
+++ b/tests/prediction/test_predictor.py
@@ -0,0 +1,68 @@
+import unittest
+import os
+
+from icolos.core.containers.compound import Compound, Enumeration
+from icolos.core.workflow_steps.prediction.predictor import StepPredictor
+
+from icolos.utils.enums.step_enums import StepBaseEnum, StepPredictorEnum
+
+from tests.tests_paths import PATHS_EXAMPLEDATA, get_mol_as_Conformer
+from icolos.utils.general.files_paths import attach_root_path
+
+_SBE = StepBaseEnum
+_SPE = StepPredictorEnum()
+
+
+class Test_Predictor(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/Prediction")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        self._example_model_path = attach_root_path(PATHS_EXAMPLEDATA.EPSA_MODEL_PATH)
+        self._example_mol_path = attach_root_path(
+            PATHS_EXAMPLEDATA.EPSA_EXAMPLE_MOLECULE
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_predict_ePSA_with_descriptors(self):
+        step_conf = {
+            _SBE.STEPID: "01_predict_ePSA",
+            _SBE.STEP_TYPE: _SBE.STEP_PREDICTION,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {_SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}},
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SPE.MODEL_PATH: self._example_model_path,
+                    _SPE.FEATURES: [
+                        "bf_weighted_volume_boltzfactor_dmso",
+                        "bf_weighted_area_boltzfactor_dmso",
+                        "bf_weighted_HB_acc_boltzfactor_dmso",
+                        "bf_weighted_HB_don_boltzfactor_dmso",
+                        "bf_weighted_sigma2_boltzfactor_dmso",
+                        "bf_weighted_Gsolv_meoh_boltzfactor_dmso",
+                    ],
+                    _SPE.NAME_PREDICTED: "pred_ePSA",
+                },
+            },
+        }
+        pred_step = StepPredictor(**step_conf)
+        pred_step.get_compounds().append(Compound())
+        pred_step.get_compounds()[0].add_enumeration(Enumeration(), auto_update=True)
+        conformer = get_mol_as_Conformer(self._example_mol_path)
+        pred_step.data.compounds[0][0].add_conformers(conformer, auto_update=True)
+        pred_step.execute()
+
+        self.assertEqual(len(pred_step.get_compounds()), 1)
+        self.assertEqual(len(pred_step.get_compounds()[0]), 1)
+        self.assertEqual(len(pred_step.get_compounds()[0][0]), 1)
+
+        # check SDF write-out (including ePSA prediction as tag)
+        out_path = os.path.join(self._test_dir, "ePSA_predicted_annotated.sdf")
+        pred_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 4448)
diff --git a/tests/rms_filter/__init__.py b/tests/rms_filter/__init__.py
new file mode 100644
index 0000000..484bdf5
--- /dev/null
+++ b/tests/rms_filter/__init__.py
@@ -0,0 +1 @@
+from tests.rms_filter.test_rmsfilter import *
diff --git a/tests/rms_filter/test_rmsfilter.py b/tests/rms_filter/test_rmsfilter.py
new file mode 100644
index 0000000..7af7034
--- /dev/null
+++ b/tests/rms_filter/test_rmsfilter.py
@@ -0,0 +1,203 @@
+import unittest
+
+from icolos.core.containers.compound import Compound, Enumeration
+from icolos.core.workflow_steps.calculation.rms_filter import StepRMSFilter
+from icolos.utils.enums.step_enums import StepBaseEnum, StepRMSFilterEnum
+from tests.tests_paths import PATHS_EXAMPLEDATA, get_mol_as_Conformer
+
+_SBE = StepBaseEnum
+_SRF = StepRMSFilterEnum()
+
+
+class Test_RMSfilter(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        pass
+
+    def setUp(self):
+        pass
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_RMSfiltering_alignmol_descending(self):
+        step_conf = {
+            _SBE.STEPID: "01_RMSfiltering",
+            _SBE.STEP_TYPE: _SBE.STEP_RMSFILTER,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SRF.METHOD: _SRF.METHOD_ALIGNMOL,
+                    _SRF.THRESHOLD: 1,
+                    _SRF.ORDER_BY: "E_cosmo",
+                    _SRF.ORDER_ASCENDING: False,
+                },
+            },
+        }
+
+        rf_step = StepRMSFilter(**step_conf)
+        rf_step.get_compounds().append(Compound(compound_number=1))
+        rf_step.get_compounds()[0].add_enumeration(Enumeration(), auto_update=True)
+        conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS)
+        rf_step.data.compounds[0][0].add_conformers(conformers, auto_update=True)
+
+        self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 11)
+        rf_step.execute()
+        self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 7)
+
+        self.assertListEqual(
+            list(
+                float(
+                    rf_step.get_compounds()[0][0]
+                    .get_conformers()[i]
+                    .get_molecule()
+                    .GetProp("E_cosmo")
+                )
+                for i in range(7)
+            ),
+            [
+                -943306.7731,
+                -943304.5548,
+                -943301.0009,
+                -943300.9934,
+                -943303.7802,
+                -943304.0485,
+                -943304.0517,
+            ],
+        )
+
+        step_conf[_SBE.SETTINGS][_SBE.SETTINGS_ADDITIONAL][_SRF.THRESHOLD] = 1.5
+        rf_step = StepRMSFilter(**step_conf)
+        rf_step.get_compounds().append(Compound(compound_number=1))
+        rf_step.get_compounds()[0].add_enumeration(Enumeration(), auto_update=True)
+        conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS)
+        rf_step.data.compounds[0][0].add_conformers(conformers, auto_update=True)
+
+        self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 11)
+        rf_step.execute()
+        self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 3)
+
+    def test_RMSfiltering_alignmol_ascending(self):
+        step_conf = {
+            _SBE.STEPID: "01_RMSfiltering",
+            _SBE.STEP_TYPE: _SBE.STEP_RMSFILTER,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SRF.METHOD: _SRF.METHOD_ALIGNMOL,
+                    _SRF.THRESHOLD: 1,
+                    _SRF.ORDER_BY: "E_cosmo",
+                    _SRF.ORDER_ASCENDING: True,
+                },
+            },
+        }
+
+        rf_step = StepRMSFilter(**step_conf)
+        rf_step.get_compounds().append(Compound(compound_number=1))
+        rf_step.get_compounds()[0].add_enumeration(Enumeration(), auto_update=True)
+        conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS)
+        rf_step.data.compounds[0][0].add_conformers(conformers, auto_update=True)
+
+        self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 11)
+        rf_step.execute()
+        self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 5)
+
+        self.assertListEqual(
+            list(
+                float(
+                    rf_step.get_compounds()[0][0]
+                    .get_conformers()[i]
+                    .get_molecule()
+                    .GetProp("E_cosmo")
+                )
+                for i in range(5)
+            ),
+            [-943304.5487, -943300.2823, -943303.7733, -943304.0485, -943304.0517],
+        )
+
+    def test_RMSfiltering_best(self):
+        step_conf = {
+            _SBE.STEPID: "01_RMSfiltering",
+            _SBE.STEP_TYPE: _SBE.STEP_RMSFILTER,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SRF.METHOD: _SRF.METHOD_BEST,
+                    _SRF.THRESHOLD: 1,
+                    _SRF.ORDER_BY: "E_cosmo",
+                    _SRF.ORDER_ASCENDING: False,
+                },
+            },
+        }
+
+        rf_step = StepRMSFilter(**step_conf)
+        rf_step.get_compounds().append(Compound(compound_number=1))
+        rf_step.get_compounds()[0].add_enumeration(Enumeration(), auto_update=True)
+        conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS)
+        rf_step.data.compounds[0][0].add_conformers(conformers, auto_update=True)
+
+        self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 11)
+        rf_step.execute()
+        self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 4)
+
+        self.assertListEqual(
+            list(
+                float(
+                    rf_step.get_compounds()[0][0]
+                    .get_conformers()[i]
+                    .get_molecule()
+                    .GetProp("E_cosmo")
+                )
+                for i in range(4)
+            ),
+            [-943306.7731, -943304.5548, -943301.0009, -943304.0517],
+        )
+
+    def test_RMSfiltering_best_notordered(self):
+        step_conf = {
+            _SBE.STEPID: "01_RMSfiltering",
+            _SBE.STEP_TYPE: _SBE.STEP_RMSFILTER,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SRF.METHOD: _SRF.METHOD_BEST,
+                    _SRF.THRESHOLD: 1,
+                },
+            },
+        }
+
+        rf_step = StepRMSFilter(**step_conf)
+        rf_step.get_compounds().append(Compound(compound_number=1))
+        rf_step.get_compounds()[0].add_enumeration(Enumeration(), auto_update=True)
+        conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS)
+        rf_step.data.compounds[0][0].add_conformers(conformers, auto_update=True)
+
+        self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 11)
+        rf_step.execute()
+        self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 4)
+
+        self.assertListEqual(
+            list(
+                float(
+                    rf_step.get_compounds()[0][0]
+                    .get_conformers()[i]
+                    .get_molecule()
+                    .GetProp("E_cosmo")
+                )
+                for i in range(4)
+            ),
+            [-943306.7731, -943304.5487, -943301.0009, -943304.0485],
+        )
diff --git a/tests/rmsd/__init__.py b/tests/rmsd/__init__.py
new file mode 100644
index 0000000..96049d9
--- /dev/null
+++ b/tests/rmsd/__init__.py
@@ -0,0 +1 @@
+from tests.rmsd.test_rmsd import Test_RMSD
diff --git a/tests/rmsd/test_rmsd.py b/tests/rmsd/test_rmsd.py
new file mode 100644
index 0000000..f11d2db
--- /dev/null
+++ b/tests/rmsd/test_rmsd.py
@@ -0,0 +1,112 @@
+import unittest
+from copy import deepcopy
+from typing import List
+
+from rdkit.Geometry.rdGeometry import Point3D
+
+from icolos.core.containers.compound import Compound, Enumeration, unroll_conformers
+from icolos.core.workflow_steps.calculation.rmsd import StepRMSD
+
+from icolos.utils.enums.step_enums import (
+    StepBaseEnum,
+    StepRMSDEnum,
+    StepDataManipulationEnum,
+)
+
+from tests.tests_paths import PATHS_EXAMPLEDATA, get_mol_as_Conformer
+
+_SBE = StepBaseEnum
+_SR = StepRMSDEnum()
+_SDM = StepDataManipulationEnum()
+
+
+def _match_as_generic(
+    comp_list_1: List[Compound], comp_list_2: List[Compound]
+) -> List[Compound]:
+    comp2_conf_unrolled = unroll_conformers(comp_list_2)
+
+    # attach the second version of the conformers as generic field to the "real" input
+    # (as would be done by the data manipulator)
+    for comp in comp_list_1:
+        for enum in comp:
+            for conf in enum:
+                conf.add_extra_data(
+                    key=_SDM.KEY_MATCHED,
+                    data=[
+                        c
+                        for c in comp2_conf_unrolled
+                        if conf.get_index_string() == c.get_index_string()
+                    ],
+                )
+    return comp_list_1
+
+
+class Test_RMSD(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        pass
+
+    def setUp(self):
+        conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS)
+
+        # Compound 1 with 1 enumeration and 11 conformers
+        self.comp1 = Compound(compound_number=1)
+        self.comp1.add_enumeration(Enumeration(), auto_update=True)
+        self.comp1[0].add_conformers(deepcopy(conformers), auto_update=True)
+
+        # Compound 2 with 1 enumeration and 11 conformers, change of some coordinates
+        self.comp2 = Compound(compound_number=1)
+        self.comp2.add_enumeration(Enumeration(), auto_update=True)
+        self.comp2[0].add_conformers(deepcopy(conformers), auto_update=True)
+        self.comp2[0][1].get_molecule().GetConformer().SetAtomPosition(
+            0, Point3D(-4.2239, -0.441, 0.2458)
+        )
+        self.comp2[0][7].get_molecule().GetConformer().SetAtomPosition(
+            0, Point3D(-1.5442, -0.7854, 0.5883)
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_RMSD_conformers_matched(self):
+        step_conf = {
+            _SBE.STEPID: "01_RMSD",
+            _SBE.STEP_TYPE: _SBE.STEP_RMSD,
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {_SR.METHOD: _SR.METHOD_ALIGNMOL},
+            },
+        }
+
+        rf_step = StepRMSD(**step_conf)
+        rf_step.get_compounds().append(_match_as_generic([self.comp1], [self.comp2])[0])
+        self.assertEqual(len(rf_step.get_compounds()[0][0][0].get_extra_data()), 1)
+
+        rf_step.execute()
+
+        self.assertEqual(
+            rf_step.get_compounds()[0][0][1].get_molecule().GetProp(_SR.RMSD_TAG),
+            "0.002",
+        )
+        self.assertEqual(
+            rf_step.get_compounds()[0][0][1]
+            .get_extra_data()[_SDM.KEY_MATCHED][0]
+            .get_molecule()
+            .GetProp(_SR.RMSD_TAG),
+            "0.002",
+        )
+        self.assertEqual(
+            rf_step.get_compounds()[0][0][3].get_molecule().GetProp(_SR.RMSD_TAG), "0.0"
+        )
+        self.assertEqual(
+            rf_step.get_compounds()[0][0][3]
+            .get_extra_data()[_SDM.KEY_MATCHED][0]
+            .get_molecule()
+            .GetProp(_SR.RMSD_TAG),
+            "0.0",
+        )
+        self.assertEqual(len(rf_step.get_compounds()[0][0].get_conformers()), 11)
diff --git a/tests/schrodinger/__init__.py b/tests/schrodinger/__init__.py
new file mode 100644
index 0000000..8ac707f
--- /dev/null
+++ b/tests/schrodinger/__init__.py
@@ -0,0 +1,10 @@
+from tests.schrodinger.test_ligprep import *
+from tests.schrodinger.test_glide import *
+from tests.schrodinger.test_macromodel import *
+from tests.schrodinger.test_fep_plus_setup import *
+from tests.schrodinger.test_fep_plus_execution import *
+from tests.schrodinger.test_fep_analysis import *
+from tests.schrodinger.test_prepwizard import *
+from tests.schrodinger.test_prime import *
+from tests.schrodinger.test_desmond_production import *
+from tests.schrodinger.test_desmond_setup import *
diff --git a/tests/schrodinger/test_desmond_production.py b/tests/schrodinger/test_desmond_production.py
new file mode 100644
index 0000000..445c1df
--- /dev/null
+++ b/tests/schrodinger/test_desmond_production.py
@@ -0,0 +1,48 @@
+from icolos.core.containers.generic import GenericData
+import unittest
+from icolos.core.workflow_steps.schrodinger.desmond_exec import StepDesmondExec
+from icolos.utils.general.files_paths import attach_root_path
+import os
+from tests.tests_paths import PATHS_EXAMPLEDATA
+
+
+from icolos.utils.enums.step_enums import StepBaseEnum, StepDesmondEnum
+
+_SBE = StepBaseEnum
+_SDE = StepDesmondEnum()
+
+
+class Test_Desmond_Exec(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/schrodinger")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        with open(attach_root_path(PATHS_EXAMPLEDATA.DESMOND_SETUP_PDB), "rb") as f:
+            self.pdb = f.read()
+
+    def test_desmond_production(self):
+        step_conf = {
+            _SBE.STEPID: "test_desmond_setup",
+            _SBE.STEP_TYPE: "desmond_preprocess",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws && $SCHRODINGER/jsc local-server-start"
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {},
+                _SBE.SETTINGS_ADDITIONAL: {_SDE.CFG_FIELDS: {"time": "1"}},
+            },
+        }
+
+        step_desmond_exec = StepDesmondExec(**step_conf)
+        step_desmond_exec.data.generic.add_file(
+            GenericData(file_name="structure.pdb", file_data=self.pdb, argument=True)
+        )
+        step_desmond_exec.execute()
+
+        out_path = os.path.join(self._test_dir, "out.cms")
+        step_desmond_exec.data.generic.write_out_all_files(self._test_dir)
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 23587000)
diff --git a/tests/schrodinger/test_desmond_setup.py b/tests/schrodinger/test_desmond_setup.py
new file mode 100644
index 0000000..86d54a9
--- /dev/null
+++ b/tests/schrodinger/test_desmond_setup.py
@@ -0,0 +1,48 @@
+from icolos.core.containers.generic import GenericData
+import unittest
+from icolos.core.workflow_steps.schrodinger.desmond_preprocessor import StepDesmondSetup
+from icolos.utils.general.files_paths import attach_root_path
+import os
+from tests.tests_paths import PATHS_EXAMPLEDATA
+
+
+from icolos.utils.enums.step_enums import StepBaseEnum, StepDesmondEnum
+
+_SBE = StepBaseEnum
+_SDE = StepDesmondEnum()
+
+
+class Test_Desmond_Setup(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/schrodinger")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        with open(attach_root_path(PATHS_EXAMPLEDATA.DESMOND_SETUP_PDB), "r") as f:
+            self.pdb = f.read()
+
+    def test_desmond_preprocess(self):
+        step_conf = {
+            _SBE.STEPID: "test_desmond_setup",
+            _SBE.STEP_TYPE: "desmond_preprocess",
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws"
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {},
+                _SBE.SETTINGS_ADDITIONAL: {_SDE.MSJ_FIELDS: {}},
+            },
+        }
+
+        step_desmond_preprocess = StepDesmondSetup(**step_conf)
+        step_desmond_preprocess.data.generic.add_file(
+            GenericData(file_name="structure.pdb", file_data=self.pdb, argument=True)
+        )
+        step_desmond_preprocess.execute()
+
+        out_path = os.path.join(self._test_dir, "setup.cms")
+        step_desmond_preprocess.data.generic.write_out_all_files(self._test_dir)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 22560500)
diff --git a/tests/schrodinger/test_fep_analysis.py b/tests/schrodinger/test_fep_analysis.py
new file mode 100644
index 0000000..92ae420
--- /dev/null
+++ b/tests/schrodinger/test_fep_analysis.py
@@ -0,0 +1,105 @@
+from icolos.core.containers.generic import GenericData
+import unittest
+import os
+from icolos.core.workflow_steps.schrodinger.fep_analysis import StepFepPlusAnalysis
+from icolos.utils.enums.step_enums import StepBaseEnum, StepFepPlusEnum, StepGlideEnum
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    get_ligands_as_compounds_with_conformers,
+    PATHS_1UYD,
+)
+from icolos.utils.general.files_paths import attach_root_path
+
+_SBE = StepBaseEnum
+_SFE = StepFepPlusEnum()
+_SGE = StepGlideEnum()
+
+
+class Test_FepPlusAnalysis(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/fep_plus")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        with open(attach_root_path(PATHS_EXAMPLEDATA.FEP_PLUS_MULTISIM_LONG), "r") as f:
+            self.log = f.read()
+
+        self.mol = get_ligands_as_compounds_with_conformers(
+            attach_root_path(PATHS_1UYD.LIG_SDF)
+        )
+
+    def test_fep_analysis(self):
+        step_conf = {
+            _SBE.STEPID: "test_fep_analysis",
+            _SBE.STEP_TYPE: "fep_analysis",
+            _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2020-4"},
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {},
+                _SBE.SETTINGS_ADDITIONAL: {_SFE.REFERENCE_DG: -10.76},
+            },
+        }
+
+        step_fep_analysis = StepFepPlusAnalysis(**step_conf)
+        step_fep_analysis.data.compounds = self.mol
+        step_fep_analysis.data.generic.add_file(
+            GenericData(
+                file_name="test_multisim.log", file_data=self.log, argument=True
+            )
+        )
+        step_fep_analysis.execute()
+        # now confirm that the values have been parsed out of the log file properly
+        # true_conf_energies = ['2.67+-0.48', '0.00+-0.40', '2.86+-0.42', '8.88+-0.52', '3.09+-0.41']
+        true_conf_energies = [
+            -10.76,
+            -6.72,
+            -8.87,
+            -7.1,
+            -7.36,
+            -9.18,
+            -10.38,
+            -9.2,
+            -5.73,
+            -7.91,
+            -9.16,
+            -7.38,
+            -7.44,
+            -1.92,
+            -6.78,
+            -6.35,
+            -2.54,
+            -7.17,
+            -6.89,
+            -8.32,
+            -8.21,
+            -6.92,
+            -6.28,
+            -7.03,
+            -8.23,
+            -11.38,
+            -9.14,
+            -7.35,
+            -7.21,
+            -7.39,
+            -1.48,
+            -8.02,
+            -7.14,
+            -6.3,
+            -7.59,
+            -9.79,
+            -6.84,
+            -7.1,
+        ]
+        conformer_energies = []
+        for compound in step_fep_analysis.data.compounds:
+            conformer_energies.append(
+                compound.get_enumerations()[0]
+                .get_conformers()[0]
+                .get_molecule()
+                .GetProp("map_dG")
+            )
+        for idx, energy in enumerate(conformer_energies):
+            self.assertAlmostEqual(
+                float(energy.split("+-")[0]), true_conf_energies[idx], 2
+            )
diff --git a/tests/schrodinger/test_fep_plus_execution.py b/tests/schrodinger/test_fep_plus_execution.py
new file mode 100644
index 0000000..cd58eaa
--- /dev/null
+++ b/tests/schrodinger/test_fep_plus_execution.py
@@ -0,0 +1,124 @@
+from icolos.core.containers.generic import GenericContainer, GenericData
+import unittest
+import os
+from icolos.core.workflow_steps.schrodinger.fep_plus_execution import StepFepPlusExec
+from icolos.utils.enums.step_enums import StepBaseEnum, StepFepPlusEnum, StepGlideEnum
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    get_ligands_as_compounds_with_conformers,
+    PATHS_1UYD,
+)
+from icolos.utils.general.files_paths import attach_root_path
+
+_SBE = StepBaseEnum
+_SFE = StepFepPlusEnum()
+_SGE = StepGlideEnum()
+
+
+class Test_FepPlusExec(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/fep_plus")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        with open(attach_root_path(PATHS_EXAMPLEDATA.FEP_PLUS_EXAMPLE_FMP), "rb") as f:
+            self.fmp_in = f.read()
+        with open(
+            attach_root_path(PATHS_EXAMPLEDATA.FEP_PLUS_EXAMPLE_FMP_OUT), "rb"
+        ) as f:
+            self.fmp_out = f.read()
+
+        with open(attach_root_path(PATHS_EXAMPLEDATA.FEP_PLUS_MULTISIM_LONG), "r") as f:
+            self.log = f.read()
+        with open(attach_root_path(PATHS_EXAMPLEDATA.FEP_PLUS_DOCKING_PV), "rb") as f:
+            self.poseviewer = f.read()
+
+        # for this example we need five compounds, they can be empty containers
+        self.mol = get_ligands_as_compounds_with_conformers(
+            attach_root_path(PATHS_1UYD.LIG_SDF)
+        )
+
+    def test_fep_exec(self):
+        step_conf = {
+            _SBE.STEPID: "test_fep_setup",
+            _SBE.STEP_TYPE: "fep_setup",
+            _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2020-4"},
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: ["-WAIT", "-h"],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _SFE.HOST_FLAG: "localhost",
+                        _SFE.JOBNAME_FLAG: "test",
+                    },
+                },
+                _SBE.SETTINGS_ADDITIONAL: {_SFE.REFERENCE_DG: -10.76},
+            },
+        }
+
+        step_fep_execution = StepFepPlusExec(**step_conf)
+        step_fep_execution.data.compounds = self.mol
+        step_fep_execution.data.generic.add_file(
+            GenericData(file_name="out.fmp", file_data=self.fmp_in, argument=True)
+        )
+        step_fep_execution._unit_test_simulate_output(self.log, self.fmp_out)
+        out_path = os.path.join(self._test_dir, "test_out.fmp")
+        step_fep_execution.write_generic_by_extension(self._test_dir, "fmp")
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 316857)
+
+        # now confirm that the values have been parsed out of the log file properly
+        # true_conf_energies = ['2.67+-0.48', '0.00+-0.40', '2.86+-0.42', '8.88+-0.52', '3.09+-0.41']
+        true_conf_energies = [
+            -10.76,
+            -6.72,
+            -8.87,
+            -7.1,
+            -7.36,
+            -9.18,
+            -10.38,
+            -9.2,
+            -5.73,
+            -7.91,
+            -9.16,
+            -7.38,
+            -7.44,
+            -1.92,
+            -6.78,
+            -6.35,
+            -2.54,
+            -7.17,
+            -6.89,
+            -8.32,
+            -8.21,
+            -6.92,
+            -6.28,
+            -7.03,
+            -8.23,
+            -11.38,
+            -9.14,
+            -7.35,
+            -7.21,
+            -7.39,
+            -1.48,
+            -8.02,
+            -7.14,
+            -6.3,
+            -7.59,
+            -9.79,
+            -6.84,
+            -7.1,
+        ]
+        conformer_energies = []
+        for compound in step_fep_execution.data.compounds:
+            conformer_energies.append(
+                compound.get_enumerations()[0]
+                .get_conformers()[0]
+                .get_molecule()
+                .GetProp("map_dG")
+            )
+        for idx, energy in enumerate(conformer_energies):
+            self.assertAlmostEqual(
+                float(energy.split("+-")[0]), true_conf_energies[idx], 2
+            )
diff --git a/tests/schrodinger/test_fep_plus_setup.py b/tests/schrodinger/test_fep_plus_setup.py
new file mode 100644
index 0000000..8cad54d
--- /dev/null
+++ b/tests/schrodinger/test_fep_plus_setup.py
@@ -0,0 +1,92 @@
+import unittest
+import os
+from icolos.core.workflow_steps.schrodinger.fep_plus_setup import StepFepPlusSetup
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGlideEnum, StepFepPlusEnum
+from tests.tests_paths import PATHS_1UYD
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    get_docked_ligands_as_conformers,
+    get_ligands_as_compounds_with_conformers,
+)
+from icolos.utils.general.files_paths import attach_root_path, empty_output_dir
+
+_SBE = StepBaseEnum
+_SGE = StepGlideEnum()
+_SFE = StepFepPlusEnum()
+
+
+class Test_FepPlusSetup(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/fep_plus")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        with open(PATHS_EXAMPLEDATA.FEP_PLUS_DOCKING_PV, "rb") as f:
+            self.poseviewer = f.read()
+        self.mol1 = get_docked_ligands_as_conformers(
+            PATHS_1UYD.LIG4_POSES, poseviewer=self.poseviewer
+        )
+        self.mol2 = get_ligands_as_compounds_with_conformers(
+            PATHS_1UYD.LIG_SDF, poseviewer=self.poseviewer
+        )
+        empty_output_dir(self._test_dir)
+
+    def test_fep_setup_with_xray(self):
+        step_conf = {
+            _SBE.STEPID: "test_fep_setup_with_xray",
+            _SBE.STEP_TYPE: _SBE.STEP_FEP_PLUS_SETUP,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws"
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SFE.XRAY_STRUCTURES: PATHS_1UYD.XRAY_STRUCTURES
+                },
+            },
+        }
+        step_fep_plus_setup = StepFepPlusSetup(**step_conf)
+        step_fep_plus_setup.data.compounds = self.mol2
+        step_fep_plus_setup.execute()
+
+        # now confirm that the map has been generated properly
+        out_path = os.path.join(self._test_dir, "xray_test_out.fmp")
+        step_fep_plus_setup.write_generic_by_extension(
+            path=os.path.join(self._test_dir, "xray_test_out.fmp"),
+            ext="fmp",
+            join=False,
+        )
+        stat_inf = os.stat(out_path)
+        self.assertAlmostEqual(stat_inf.st_size, 821966, delta=500)
+
+    def test_fep_setup(self):
+        step_conf = {
+            _SBE.STEPID: "test_fep_setup",
+            _SBE.STEP_TYPE: _SBE.STEP_FEP_PLUS_SETUP,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-1-js-aws"
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {},
+                }
+            },
+        }
+
+        step_fep_plus_setup = StepFepPlusSetup(**step_conf)
+        step_fep_plus_setup.data.compounds = self.mol1
+        step_fep_plus_setup.execute()
+
+        # now confirm that the map has been generated properly
+        out_path = os.path.join(self._test_dir, "test_out.fmp")
+        step_fep_plus_setup.write_generic_by_extension(
+            path=os.path.join(self._test_dir, "test_out.fmp"), ext="fmp", join=False
+        )
+        stat_inf = os.stat(out_path)
+        self.assertAlmostEqual(stat_inf.st_size, 848697, delta=500)
diff --git a/tests/schrodinger/test_glide.py b/tests/schrodinger/test_glide.py
new file mode 100644
index 0000000..acf99b6
--- /dev/null
+++ b/tests/schrodinger/test_glide.py
@@ -0,0 +1,511 @@
+import os
+import time
+import unittest
+
+from icolos.core.workflow_steps.schrodinger.glide import StepGlide
+
+from icolos.utils.enums.step_enums import StepBaseEnum, TokenGuardEnum, StepGlideEnum
+from icolos.utils.enums.program_parameters import GlideEnum
+
+from tests.tests_paths import (
+    PATHS_1UYD,
+    PATHS_EXAMPLEDATA,
+    get_1UYD_ligands_as_Compounds,
+)
+from icolos.utils.general.files_paths import attach_root_path
+
+_SBE = StepBaseEnum
+_SGE = StepGlideEnum()
+_EE = GlideEnum()
+_TE = TokenGuardEnum()
+
+
+class Test_Glide(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/Glide")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        self._1UYD_compounds = get_1UYD_ligands_as_Compounds(
+            abs_path=PATHS_1UYD.LIGANDS
+        )
+        self.receptor_path = PATHS_1UYD.GRID_PATH
+        self.receptor_constraints_path = PATHS_1UYD.GRID_CONSTRAINTS_PATH
+        self.receptor_path_COX2 = PATHS_EXAMPLEDATA.PRIME_COX2_GRID
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_Glide_run(self):
+        step_conf = {
+            _SBE.STEPID: "01_glide",
+            _SBE.STEP_TYPE: _SBE.STEP_GLIDE,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws",
+                _SBE.EXEC_PARALLELIZATION: {
+                    _SBE.EXEC_PARALLELIZATION_CORES: 4,
+                    _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 2,
+                },
+                _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {_EE.GLIDE_HOST: "cpu-only"},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SGE.CONFIGURATION: {
+                        _EE.GLIDE_AMIDE_MODE: "trans",
+                        _EE.GLIDE_EXPANDED_SAMPLING: "True",
+                        _EE.GLIDE_GRIDFILE: [self.receptor_path],
+                        _EE.GLIDE_NENHANCED_SAMPLING: "1",
+                        _EE.GLIDE_POSE_OUTTYPE: _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB,
+                        _EE.GLIDE_POSES_PER_LIG: "3",
+                        _EE.GLIDE_POSTDOCK_NPOSE: "25",
+                        _EE.GLIDE_POSTDOCKSTRAIN: "True",
+                        _EE.GLIDE_PRECISION: "SP",
+                        _EE.GLIDE_REWARD_INTRA_HBONDS: "True",
+                    }
+                },
+            },
+        }
+
+        glide_step = StepGlide(**step_conf)
+        glide_step.data.compounds = self._1UYD_compounds
+
+        glide_step.execute()
+        self.assertEqual(len(glide_step.get_compounds()), 15)
+        self.assertEqual(len(glide_step.get_compounds()[0][0].get_conformers()), 3)
+        self.assertListEqual(
+            list(
+                glide_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-1.5198, 11.3439, 24.0245],
+        )
+
+        self.assertListEqual(
+            list(
+                glide_step.get_compounds()[14][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-2.1655, 12.4809, 24.137],
+        )
+        self.assertEqual(
+            glide_step.get_compounds()[0][0][0]
+            .get_molecule()
+            .GetProp(_SBE.ANNOTATION_TAG_DOCKING_SCORE),
+            "-8.4349",
+        )
+        self.assertEqual(
+            glide_step.get_compounds()[0][0][1]
+            .get_molecule()
+            .GetProp(_SBE.ANNOTATION_TAG_DOCKING_SCORE),
+            "-7.83118",
+        )
+        self.assertEqual(
+            glide_step.get_compounds()[0][0][2]
+            .get_molecule()
+            .GetProp(_SBE.ANNOTATION_TAG_DOCKING_SCORE),
+            "-6.0089",
+        )
+
+        # check SDF write-out
+        out_path = os.path.join(self._test_dir, "glide_docked.sdf")
+        glide_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 209000)
+
+    def test_Glide_run_parallelization_1core_singleton(self):
+        step_conf = {
+            _SBE.STEPID: "01_glide",
+            _SBE.STEP_TYPE: _SBE.STEP_GLIDE,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws",
+                _SBE.EXEC_PARALLELIZATION: {
+                    _SBE.EXEC_PARALLELIZATION_CORES: 1,
+                    _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1,
+                },
+                _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {_EE.GLIDE_HOST: "cpu-only"},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SGE.CONFIGURATION: {
+                        _EE.GLIDE_AMIDE_MODE: "trans",
+                        _EE.GLIDE_EXPANDED_SAMPLING: "True",
+                        _EE.GLIDE_GRIDFILE: [self.receptor_path],
+                        _EE.GLIDE_NENHANCED_SAMPLING: "1",
+                        _EE.GLIDE_POSE_OUTTYPE: _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB,
+                        _EE.GLIDE_POSES_PER_LIG: "3",
+                        _EE.GLIDE_POSTDOCK_NPOSE: "25",
+                        _EE.GLIDE_POSTDOCKSTRAIN: "True",
+                        _EE.GLIDE_PRECISION: "SP",
+                        _EE.GLIDE_REWARD_INTRA_HBONDS: "True",
+                    }
+                },
+            },
+        }
+
+        compounds = self._1UYD_compounds[:3]
+
+        glide_step = StepGlide(**step_conf)
+        glide_step.data.compounds = compounds
+
+        # execute on one core and put all in one list
+        time_difference = time.time()
+        glide_step.execute()
+        time_difference = time.time() - time_difference
+        self.assertGreater(time_difference, 100)
+        self.assertGreater(325, time_difference)
+        self.assertEqual(len(glide_step.get_compounds()), 3)
+        self.assertEqual(len(glide_step.get_compounds()[0][0].get_conformers()), 3)
+        self.assertListEqual(
+            list(
+                glide_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-1.5198, 11.3439, 24.0245],
+        )
+
+        # check SDF write-out
+        out_path = os.path.join(
+            self._test_dir, "glide_docked_single_core_singleton_list.sdf"
+        )
+        glide_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 50500)
+
+    def test_Glide_run_1_core(self):
+        step_conf = {
+            _SBE.STEPID: "01_glide",
+            _SBE.STEP_TYPE: _SBE.STEP_GLIDE,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws",
+                _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 1},
+                _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {_EE.GLIDE_HOST: "cpu-only"},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SGE.CONFIGURATION: {
+                        _EE.GLIDE_AMIDE_MODE: "trans",
+                        _EE.GLIDE_EXPANDED_SAMPLING: "True",
+                        _EE.GLIDE_GRIDFILE: [self.receptor_path],
+                        _EE.GLIDE_NENHANCED_SAMPLING: "1",
+                        _EE.GLIDE_POSE_OUTTYPE: _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB,
+                        _EE.GLIDE_POSES_PER_LIG: "3",
+                        _EE.GLIDE_POSTDOCK_NPOSE: "25",
+                        _EE.GLIDE_POSTDOCKSTRAIN: "True",
+                        _EE.GLIDE_PRECISION: "SP",
+                        _EE.GLIDE_REWARD_INTRA_HBONDS: "True",
+                    }
+                },
+            },
+        }
+
+        compounds = self._1UYD_compounds[:3]
+
+        glide_step = StepGlide(**step_conf)
+        glide_step.data.compounds = compounds
+
+        # execute on one core and put all in one list
+        time_difference = time.time()
+        glide_step.execute()
+        time_difference = time.time() - time_difference
+        self.assertGreater(325, time_difference)
+        self.assertEqual(len(glide_step.get_compounds()), 3)
+        self.assertEqual(len(glide_step.get_compounds()[0][0].get_conformers()), 3)
+        self.assertListEqual(
+            list(
+                glide_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-1.5198, 11.3439, 24.0245],
+        )
+
+        # check SDF write-out
+        out_path = os.path.join(
+            self._test_dir, "glide_docked_merged_list_3compounds.sdf"
+        )
+        glide_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 50500)
+
+    def test_Glide_run_parallelization_4cores(self):
+        step_conf = {
+            _SBE.STEPID: "01_glide",
+            _SBE.STEP_TYPE: _SBE.STEP_GLIDE,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws",
+                _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 4},
+                _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {_EE.GLIDE_HOST: "cpu-only"},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SGE.CONFIGURATION: {
+                        _EE.GLIDE_AMIDE_MODE: "trans",
+                        _EE.GLIDE_EXPANDED_SAMPLING: "True",
+                        _EE.GLIDE_GRIDFILE: [self.receptor_path],
+                        _EE.GLIDE_NENHANCED_SAMPLING: "1",
+                        _EE.GLIDE_POSE_OUTTYPE: _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB,
+                        _EE.GLIDE_POSES_PER_LIG: "3",
+                        _EE.GLIDE_POSTDOCK_NPOSE: "25",
+                        _EE.GLIDE_POSTDOCKSTRAIN: "True",
+                        _EE.GLIDE_PRECISION: "SP",
+                        _EE.GLIDE_REWARD_INTRA_HBONDS: "True",
+                    }
+                },
+            },
+        }
+
+        compounds = self._1UYD_compounds[:3]
+
+        glide_step = StepGlide(**step_conf)
+        glide_step.data.compounds = compounds
+
+        # execute and put all in one list
+        time_difference = time.time()
+        glide_step.execute()
+        time_difference = time.time() - time_difference
+        self.assertGreater(150, time_difference)
+        self.assertEqual(len(glide_step.get_compounds()), 3)
+        self.assertEqual(len(glide_step.get_compounds()[0][0].get_conformers()), 3)
+        self.assertListEqual(
+            list(
+                glide_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-1.5198, 11.3439, 24.0245],
+        )
+
+        # check SDF write-out
+        out_path = os.path.join(
+            self._test_dir, "glide_docked_parallelized_3compounds.sdf"
+        )
+        glide_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 50500)
+
+    def test_Glide_run_parallelization_4cores_in_file_usage(self):
+        step_conf = {
+            _SBE.STEPID: "01_glide",
+            _SBE.STEP_TYPE: _SBE.STEP_GLIDE,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws",
+                _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 4},
+                _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {_EE.GLIDE_HOST: "cpu-only"},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SGE.CONFIGURATION: {
+                        _EE.GLIDE_AMIDE_MODE: "trans",
+                        _EE.GLIDE_EXPANDED_SAMPLING: "True",
+                        _EE.GLIDE_GRIDFILE: [self.receptor_constraints_path],
+                        _EE.GLIDE_NENHANCED_SAMPLING: "1",
+                        _EE.GLIDE_POSE_OUTTYPE: _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB,
+                        _EE.GLIDE_POSES_PER_LIG: "3",
+                        _EE.GLIDE_POSTDOCK_NPOSE: "25",
+                        _EE.GLIDE_POSTDOCKSTRAIN: "True",
+                        _EE.GLIDE_PRECISION: "SP",
+                        _EE.GLIDE_REWARD_INTRA_HBONDS: "True",
+                    },
+                    _SGE.MAESTRO_IN_FILE: {
+                        _SGE.MAESTRO_IN_FILE_PATH: PATHS_EXAMPLEDATA.GLIDE_EXAMPLE_IN
+                    },
+                },
+            },
+        }
+
+        compounds = self._1UYD_compounds[:3]
+
+        glide_step = StepGlide(**step_conf)
+        glide_step.data.compounds = compounds
+        glide_step.execute()
+
+        # execute on one core and put all in one list
+        self.assertEqual(len(glide_step.get_compounds()), 3)
+        self.assertEqual(len(glide_step.get_compounds()[0][0].get_conformers()), 3)
+
+        # would be [-2.5618, 10.8202, 25.2644] without constraints
+        self.assertListEqual(
+            list(
+                glide_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [3.1229, 4.5141, 24.8603],
+        )
+
+        # check SDF write-out
+        out_path = os.path.join(
+            self._test_dir, "glide_docked_parallelized_3compounds.sdf"
+        )
+        glide_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 50500)
+
+    def test_Glide_run_parallelization_4cores_ensemble_docking(self):
+        step_conf = {
+            _SBE.STEPID: "01_glide",
+            _SBE.STEP_TYPE: _SBE.STEP_GLIDE,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws",
+                _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 4},
+                _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {_EE.GLIDE_HOST: "cpu-only"},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SGE.CONFIGURATION: {
+                        _EE.GLIDE_AMIDE_MODE: "trans",
+                        _EE.GLIDE_EXPANDED_SAMPLING: "True",
+                        _EE.GLIDE_GRIDFILE: [
+                            self.receptor_path_COX2,
+                            self.receptor_path,
+                        ],
+                        _EE.GLIDE_NENHANCED_SAMPLING: "1",
+                        _EE.GLIDE_POSE_OUTTYPE: _EE.GLIDE_POSE_OUTTYPE_LIGANDLIB,
+                        _EE.GLIDE_POSES_PER_LIG: "3",
+                        _EE.GLIDE_POSTDOCK_NPOSE: "25",
+                        _EE.GLIDE_POSTDOCKSTRAIN: "True",
+                        _EE.GLIDE_PRECISION: "SP",
+                        _EE.GLIDE_REWARD_INTRA_HBONDS: "True",
+                    },
+                    _SBE.GRID_IDS: ["mygrid1", "mygrid2"],
+                },
+            },
+        }
+
+        compounds = self._1UYD_compounds[:3]
+
+        glide_step = StepGlide(**step_conf)
+        glide_step.data.compounds = compounds
+
+        # execute on one core and put all in one list
+        glide_step.execute()
+        self.assertEqual(len(glide_step.get_compounds()), 3)
+        self.assertEqual(len(glide_step.get_compounds()[0][0].get_conformers()), 6)
+        self.assertListEqual(
+            list(
+                glide_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-1.5198, 11.3439, 24.0245],
+        )
+        self.assertListEqual(
+            list(
+                glide_step.get_compounds()[0][0][5]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [7.3776, 55.7005, 70.3807],
+        )
+        self.assertListEqual(
+            ["mygrid2", "mygrid2", "mygrid1", "mygrid2", "mygrid1", "mygrid1"],
+            [
+                comp.get_molecule().GetProp(_SBE.ANNOTATION_GRID_ID)
+                for comp in list(glide_step.get_compounds()[0][0])
+            ],
+        )
+
+        # check SDF write-out
+        out_path = os.path.join(
+            self._test_dir, "glide_docked_parallelized_ensemble_docking.sdf"
+        )
+        glide_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 80000)
+
+    def test_Glide_run_parallelization_poseviewer(self):
+        step_conf = {
+            _SBE.STEPID: "01_glide",
+            _SBE.STEP_TYPE: _SBE.STEP_GLIDE,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws",
+                _SBE.EXEC_PARALLELIZATION: {_SBE.EXEC_PARALLELIZATION_CORES: 4},
+                _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {_EE.GLIDE_HOST: "cpu-only"},
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SGE.CONFIGURATION: {
+                        _EE.GLIDE_AMIDE_MODE: "trans",
+                        _EE.GLIDE_EXPANDED_SAMPLING: "True",
+                        _EE.GLIDE_GRIDFILE: [self.receptor_path],
+                        _EE.GLIDE_NENHANCED_SAMPLING: "1",
+                        _EE.GLIDE_POSE_OUTTYPE: _EE.GLIDE_POSE_OUTTYPE_POSEVIEWER,
+                        _EE.GLIDE_POSES_PER_LIG: "3",
+                        _EE.GLIDE_POSTDOCK_NPOSE: "25",
+                        _EE.GLIDE_POSTDOCKSTRAIN: "True",
+                        _EE.GLIDE_PRECISION: "SP",
+                        _EE.GLIDE_REWARD_INTRA_HBONDS: "True",
+                    }
+                },
+            },
+        }
+
+        compounds = self._1UYD_compounds[:3]
+
+        glide_step = StepGlide(**step_conf)
+        glide_step.data.compounds = compounds
+
+        # execute on one core and put all in one list
+        glide_step.execute()
+
+        self.assertEqual(len(glide_step.get_compounds()), 3)
+        self.assertEqual(len(glide_step.get_compounds()[0][0].get_conformers()), 3)
+        self.assertListEqual(
+            list(
+                glide_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-1.5198, 11.3439, 24.0245],
+        )
+
+        # check SDF write-out
+        out_path = os.path.join(
+            self._test_dir, "glide_docked_parallelized_3compounds_pv.sdf"
+        )
+        glide_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 50500)
diff --git a/tests/schrodinger/test_ligprep.py b/tests/schrodinger/test_ligprep.py
new file mode 100644
index 0000000..c8c7ef3
--- /dev/null
+++ b/tests/schrodinger/test_ligprep.py
@@ -0,0 +1,220 @@
+import unittest
+
+from icolos.core.workflow_steps.schrodinger.ligprep import StepLigprep
+from icolos.utils.enums.step_enums import StepBaseEnum, TokenGuardEnum, StepLigprepEnum
+from icolos.utils.enums.program_parameters import LigprepEnum
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    get_mol_as_Compound,
+    get_test_Compounds_without_molecules,
+)
+from icolos.utils.general.files_paths import attach_root_path
+
+_SBE = StepBaseEnum
+_LSBE = StepLigprepEnum()
+_LIE = LigprepEnum()
+_TE = TokenGuardEnum()
+
+
+class Test_Ligprep(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        pass
+
+    def setUp(self):
+        self._paracetamol_molecule = get_mol_as_Compound(
+            attach_root_path(PATHS_EXAMPLEDATA.PARACETAMOL_PATH), compound_number=0
+        )
+        self._aspirin_molecule = get_mol_as_Compound(
+            attach_root_path(PATHS_EXAMPLEDATA.ASPIRIN_PATH), compound_number=1
+        )
+        self._Aspirin = get_test_Compounds_without_molecules(compound_numbers=[2])[
+            "Aspirin"
+        ]
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_LigPrep_run(self):
+        step_conf = {
+            _SBE.STEPID: "01_ligprep",
+            _SBE.STEP_TYPE: _SBE.STEP_LIGPREP,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2020-4",
+                _SBE.EXEC_PARALLELIZATION: {
+                    _SBE.EXEC_PARALLELIZATION_CORES: 2,
+                    _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 2,
+                },
+                _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 2},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _LIE.LIGPREP_F: "/a/path/to/be/ignored/filter.txt"
+                    }
+                }
+            },
+        }
+
+        ligprep_step = StepLigprep(**step_conf)
+        ligprep_step.data.compounds = [
+            self._paracetamol_molecule,
+            self._aspirin_molecule,
+            self._Aspirin,
+        ]
+
+        ligprep_step.execute()
+        self.assertEqual(
+            ["0:0", "1:0", "2:0"],
+            [
+                enum.get_index_string()
+                for comp in ligprep_step.get_compounds()
+                for enum in comp
+            ],
+        )
+        self.assertEqual(
+            [
+                "[H]c1c([H])c(Cl)c2c(=O)nc(N([H])C(=O)C([H])([H])[H])sc2c1[H]",
+                "[H]c1c([H])c(Cl)c2c(=O)nc(C(=O)[O-])sc2c1[H]",
+                "O=C(C)Oc1ccccc1C(=O)O",
+            ],
+            [
+                enum.get_original_smile()
+                for comp in ligprep_step.get_compounds()
+                for enum in comp
+            ],
+        )
+        self.assertEqual(
+            [
+                "[H]c1c([H])c(Cl)c2c(=O)nc(N([H])C(=O)C([H])([H])[H])sc2c1[H]",
+                "[H]OC(=O)c1nc(=O)c2c(Cl)c([H])c([H])c([H])c2s1",
+                "[H]OC(=O)c1c([H])c([H])c([H])c([H])c1OC(=O)C([H])([H])[H]",
+            ],
+            [
+                enum.get_smile()
+                for comp in ligprep_step.get_compounds()
+                for enum in comp
+            ],
+        )
+        self.assertListEqual(
+            list(
+                ligprep_step.get_compounds()[0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-4.9037, 3.0725, 2.0034],
+        )
+        self.assertListEqual(
+            list(
+                ligprep_step.get_compounds()[1][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-4.8794, 3.0688, -2.0104],
+        )
+        self.assertListEqual(
+            list(
+                ligprep_step.get_compounds()[2][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [0.0243, 2.4719, -0.3164],
+        )
+
+    def test_LigPrep_run_EPIK_stereo_filtering(self):
+        step_conf = {
+            _SBE.STEPID: "01_ligprep",
+            _SBE.STEP_TYPE: _SBE.STEP_LIGPREP,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2020-4",
+                _SBE.EXEC_PARALLELIZATION: {
+                    _SBE.EXEC_PARALLELIZATION_CORES: 2,
+                    _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 2,
+                },
+                _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 2},
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [_LIE.LIGPREP_EPIK],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        _LIE.LIGPREP_PH: 7.0,
+                        _LIE.LIGPREP_PHT: 2.0,
+                        _LIE.LIGPREP_S: 10,
+                        _LIE.LIGPREP_BFF: 14,
+                    },
+                },
+                _SBE.SETTINGS_ADDITIONAL: {_LSBE.FILTER_FILE: {"Total_charge": "!= 0"}},
+            },
+        }
+
+        ligprep_step = StepLigprep(**step_conf)
+        ligprep_step.data.compounds = [
+            self._paracetamol_molecule,
+            self._aspirin_molecule,
+            self._Aspirin,
+        ]
+
+        ligprep_step.execute()
+        self.assertEqual(
+            ["0:0", "0:1", "1:0"],
+            [
+                enum.get_index_string()
+                for comp in ligprep_step.get_compounds()
+                for enum in comp
+            ],
+        )
+        self.assertEqual(
+            [
+                "[H]c1c([H])c(Cl)c2c(=O)nc(N([H])C(=O)C([H])([H])[H])sc2c1[H]",
+                "[H]c1c([H])c(Cl)c2c(=O)nc(N([H])C(=O)C([H])([H])[H])sc2c1[H]",
+                "[H]c1c([H])c(Cl)c2c(=O)nc(C(=O)[O-])sc2c1[H]",
+            ],
+            [
+                enum.get_original_smile()
+                for comp in ligprep_step.get_compounds()
+                for enum in comp
+            ],
+        )
+        self.assertEqual(
+            [
+                "[H]c1c([H])c(Cl)c2c(=O)n([H])/c(=N\\C(=O)C([H])([H])[H])sc2c1[H]",
+                "[H]c1c([H])c(Cl)c2c(=O)nc(N([H])C(=O)C([H])([H])[H])sc2c1[H]",
+                "[H]c1c([H])c(Cl)c2c(=O)[n+]([H])c(C(=O)[O-])sc2c1[H]",
+            ],
+            [
+                enum.get_smile()
+                for comp in ligprep_step.get_compounds()
+                for enum in comp
+            ],
+        )
+        self.assertListEqual(
+            list(
+                ligprep_step.get_compounds()[0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-4.7828, 5.0389, -2.1622],
+        )
+        self.assertListEqual(
+            list(
+                ligprep_step.get_compounds()[0][1]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-4.9037, 3.0725, 2.0034],
+        )
+        self.assertListEqual(
+            list(
+                ligprep_step.get_compounds()[1][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-5.2155, 3.215, -1.1152],
+        )
diff --git a/tests/schrodinger/test_macromodel.py b/tests/schrodinger/test_macromodel.py
new file mode 100644
index 0000000..d9f358b
--- /dev/null
+++ b/tests/schrodinger/test_macromodel.py
@@ -0,0 +1,93 @@
+import unittest
+import os
+
+from icolos.core.workflow_steps.schrodinger.macromodel import StepMacromodel
+
+from icolos.utils.enums.step_enums import StepBaseEnum, TokenGuardEnum
+from icolos.utils.enums.program_parameters import MacromodelEnum
+
+from tests.tests_paths import PATHS_EXAMPLEDATA, get_mol_as_Compound
+from icolos.utils.general.files_paths import attach_root_path
+
+_SBE = StepBaseEnum
+_CE = MacromodelEnum()
+_TE = TokenGuardEnum()
+
+
+class Test_Macromodel_confgen(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/MacroModel")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        self._paracetamol_molecule = get_mol_as_Compound(
+            attach_root_path(PATHS_EXAMPLEDATA.PARACETAMOL_PATH)
+        )
+        self._aspirin_molecule = get_mol_as_Compound(
+            attach_root_path(PATHS_EXAMPLEDATA.ASPIRIN_PATH)
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_MacroModel_run(self):
+        step_conf = {
+            _SBE.STEPID: "01_macromodel",
+            _SBE.STEP_TYPE: _SBE.STEP_MACROMODEL,
+            _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2020-4"},
+            _TE.TG: {
+                _TE.TG_PREFIX_EXECUTION: "module load schrodinger/2020-4",
+                _TE.TG_TOKEN_POOLS: {"MMOD_MACROMODEL": 2},
+                _TE.TG_WAIT_INTERVAL_SECONDS: 30,
+                _TE.TG_WAIT_LIMIT_SECONDS: 900,
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [_CE.MACROMODEL_WAIT],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {_CE.MACROMODEL_NJOBS: 2},
+                }
+            },
+        }
+
+        mm_step = StepMacromodel(**step_conf)
+        mm_step.data.compounds = [self._paracetamol_molecule]
+
+        # conformer coordinates should not be touched by the execution
+        self.assertListEqual(
+            list(
+                mm_step.get_compounds()[0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-3.8276, -1.0625, 0.3279],
+        )
+        mm_step.execute()
+        self.assertListEqual(
+            list(
+                mm_step.get_compounds()[0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-3.8276, -1.0625, 0.3279],
+        )
+        self.assertEqual(len(mm_step.get_compounds()[0][0].get_conformers()), 10)
+        self.assertEqual(
+            list(
+                mm_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [-4.2269, -0.441, 0.2359],
+        )
+
+        # check write-out
+        out_path = os.path.join(self._test_dir, "macromodel_output_file.sdf")
+        mm_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 25637)
diff --git a/tests/schrodinger/test_prepwizard.py b/tests/schrodinger/test_prepwizard.py
new file mode 100644
index 0000000..95511da
--- /dev/null
+++ b/tests/schrodinger/test_prepwizard.py
@@ -0,0 +1,118 @@
+import unittest
+
+from icolos.utils.enums.step_enums import StepBaseEnum
+from icolos.core.workflow_steps.schrodinger.prepwizard import StepPrepwizard
+from icolos.core.containers.generic import GenericData
+from tests.tests_paths import (
+    PATHS_1UYD,
+    PATHS_EXAMPLEDATA,
+)
+import os
+from icolos.utils.enums.step_enums import StepBaseEnum, StepGromacsEnum, StepPrepwizEnum
+from tests.tests_paths import PATHS_EXAMPLEDATA
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.core.workflow_steps.schrodinger.prepwizard import StepPrepwizard
+
+_SGE = StepGromacsEnum()
+_SBE = StepBaseEnum
+_SPE = StepPrepwizEnum()
+
+
+class Test_Prepwizard(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/prepwizard")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        with open(PATHS_1UYD.PDB_PATH, "r") as f:
+            data = f.read()
+        self.GenericData = GenericData(file_name="test_structure.pdb", file_data=data)
+        with open(PATHS_EXAMPLEDATA.DESMOND_SETUP_PDB, "r") as f:
+            self.cox = f.read()
+
+    def test_prepwizard(self):
+        step_conf = {
+            _SBE.STEPID: "01_ligprep",
+            _SBE.STEP_TYPE: _SBE.STEP_PREPWIZARD,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2020-4",
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {_SBE.SETTINGS_ARGUMENTS_PARAMETERS: {}}
+            },
+        }
+
+        prepwiz_step = StepPrepwizard(**step_conf)
+        prepwiz_step.data.generic.add_file(self.GenericData)
+        prepwiz_step.execute()
+
+        out_file = prepwiz_step.data.generic.get_files_by_extension("pdb")[0].get_data()
+        out_path = os.path.join(self._test_dir, "test_out.pdb")
+        with open(out_path, "w") as f:
+            f.write(out_file)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 53635)
+
+    def test_remove_ligand(self):
+        step_conf = {
+            _SBE.STEPID: "test_rem",
+            _SBE.STEP_TYPE: _SBE.STEP_PREPWIZARD,
+            _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "ml schrodinger"},
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ADDITIONAL: {_SPE.REMOVE_RES: ["S58"]},
+            },
+        }
+
+        step_removelig = StepPrepwizard(**step_conf)
+        step_removelig.data.generic.add_file(
+            GenericData(file_name="cox.pdb", file_data=self.cox, argument=True)
+        )
+
+        step_removelig.execute()
+        out_path = os.path.join(self._test_dir, "cox.pdb")
+        step_removelig.write_generic_by_extension(
+            self._test_dir,
+            _SGE.PROTEIN_PDB,
+        )
+
+        out_file = step_removelig.data.generic.get_files_by_extension("pdb")[
+            0
+        ].get_data()
+        with open(out_path, "w") as f:
+            f.write(out_file)
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 738100)
+
+    def test_auto_remove_ligand(self):
+        step_conf = {
+            _SBE.STEPID: "test_rem",
+            _SBE.STEP_TYPE: _SBE.STEP_PREPWIZARD,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws"
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ADDITIONAL: {_SPE.REMOVE_RES: "ligands"},
+            },
+        }
+
+        step_removelig = StepPrepwizard(**step_conf)
+        step_removelig.data.generic.add_file(
+            GenericData(file_name="cox.pdb", file_data=self.cox, argument=True)
+        )
+
+        step_removelig.execute()
+        out_path = os.path.join(self._test_dir, "cox_auto.pdb")
+        step_removelig.write_generic_by_extension(
+            self._test_dir,
+            _SGE.PROTEIN_PDB,
+        )
+
+        out_file = step_removelig.data.generic.get_files_by_extension("pdb")[
+            0
+        ].get_data()
+        with open(out_path, "w") as f:
+            f.write(out_file)
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 724500)
diff --git a/tests/schrodinger/test_prime.py b/tests/schrodinger/test_prime.py
new file mode 100644
index 0000000..ef3a631
--- /dev/null
+++ b/tests/schrodinger/test_prime.py
@@ -0,0 +1,195 @@
+import unittest
+import os
+
+from icolos.core.workflow_steps.schrodinger.prime import StepPrime
+
+from icolos.utils.enums.step_enums import StepBaseEnum, StepPrimeEnum, TokenGuardEnum
+from icolos.utils.enums.program_parameters import PrimeEnum
+
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    get_mol_as_Compound,
+    get_ligands_as_compounds_with_conformers,
+)
+from icolos.utils.general.files_paths import attach_root_path
+
+_SBE = StepBaseEnum
+_SPE = StepPrimeEnum()
+_CE = PrimeEnum()
+_TE = TokenGuardEnum()
+
+
+class Test_Prime(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/prime_test")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        with open(attach_root_path(PATHS_EXAMPLEDATA.PRIME_POSEVIEWER), "rb") as f:
+            self._poseviewer = f.read()
+        self._molecule = get_mol_as_Compound(
+            attach_root_path(PATHS_EXAMPLEDATA.PRIME_DOCKED_LIGAND_SDF)
+        )
+        self._conformers = get_ligands_as_compounds_with_conformers(
+            attach_root_path(PATHS_EXAMPLEDATA.LIGANDS_1UYD)
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_Prime_run(self):
+        # TODO: make sure the original execution mode (on enumerations) works ok
+        # * Pull the molecule from the enumeration if no conformers attached
+        # * add conformer to the enum at the end
+        step_conf = {
+            _SBE.STEPID: "01_prime",
+            _SBE.STEP_TYPE: _SBE.STEP_PRIME,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws",
+                _SBE.EXEC_PARALLELIZATION: {
+                    _SBE.EXEC_PARALLELIZATION_CORES: 4,
+                    _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 2,
+                },
+                _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1},
+            },
+            _TE.TG: {
+                _TE.TG_PREFIX_EXECUTION: "module load schrodinger/2021-2-js-aws",
+                _TE.TG_TOKEN_POOLS: {"PRIMEX_MAIN": 8},
+                _TE.TG_WAIT_INTERVAL_SECONDS: 30,
+                _TE.TG_WAIT_LIMIT_SECONDS: 900,
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        "-prime_opt": "OPLS_VERSION=OPLS3e",
+                        "-HOST": "cpu-only",
+                    },
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SPE.RECEPTOR: attach_root_path(PATHS_EXAMPLEDATA.RECEPTOR_1UYD)
+                },
+            },
+        }
+
+        prime_step = StepPrime(**step_conf)
+        prime_step.data.compounds = [self._molecule]
+        prime_step.execute()
+
+        self.assertEqual(len(prime_step.get_compounds()[0][0].get_conformers()), 1)
+        # molecule coordinates should not be touched by the execution (conformer is optimized though)
+        self.assertListEqual(
+            list(
+                prime_step.get_compounds()[0][0]
+                .get_molecule()
+                .GetConformer(0)
+                .GetPositions()[0]
+            ),
+            [15.2886, 52.7, 69.7128],
+        )
+
+        # check write-out
+        out_path = os.path.join(self._test_dir, "prime_output_file.sdf")
+        prime_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 10000)
+        self.assertGreater(13500, stat_inf.st_size)
+
+    def test_prime_run_conformers(self):
+        step_conf = {
+            _SBE.STEPID: "01_prime",
+            _SBE.STEP_TYPE: _SBE.STEP_PRIME,
+            _SBE.EXEC: {
+                _SBE.EXEC_PREFIXEXECUTION: "module load schrodinger/2021-2-js-aws",
+                _SBE.EXEC_PARALLELIZATION: {
+                    _SBE.EXEC_PARALLELIZATION_CORES: 32,
+                    _SBE.EXEC_PARALLELIZATION_MAXLENSUBLIST: 1,
+                },
+                _SBE.EXEC_FAILUREPOLICY: {_SBE.EXEC_FAILUREPOLICY_NTRIES: 1},
+            },
+            _TE.TG: {
+                _TE.TG_PREFIX_EXECUTION: "module load schrodinger/2021-2-js-aws",
+                _TE.TG_TOKEN_POOLS: {"PRIMEX_MAIN": 8},
+                _TE.TG_WAIT_INTERVAL_SECONDS: 30,
+                _TE.TG_WAIT_LIMIT_SECONDS: 900,
+            },
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        "-prime_opt": "OPLS_VERSION=OPLS3e",
+                        "-HOST": "cpu-only",
+                    },
+                },
+                _SBE.SETTINGS_ADDITIONAL: {
+                    _SPE.RECEPTOR: attach_root_path(PATHS_EXAMPLEDATA.RECEPTOR_1UYD)
+                },
+            },
+        }
+        prime_step = StepPrime(**step_conf)
+        prime_step.data.compounds = self._conformers
+        prime_step.execute()
+        out_path = os.path.join(self._test_dir, "prime_conformers_output.sdf")
+        prime_step.write_conformers(out_path)
+        scores = [
+            "-46.4912412523436",
+            "-49.5744863214668",
+            "-63.4520243626994",
+            "-55.2546247037599",
+            "-35.0457131568983",
+            "-37.584671678831",
+            "-52.3315306739823",
+            "-42.1457765778323",
+            "-39.0962071597705",
+            "-46.9267618228951",
+            "-41.4015029031088",
+            "-49.0027294452047",
+            "-45.297078493255",
+            "-47.1669750502297",
+            "-50.2110899116497",
+            "-38.8494636817877",
+            "-41.6326792228592",
+            "-43.6924482130898",
+            "-46.738882435201",
+            "-45.242419676907",
+            "-36.5693940219298",
+            "-57.9606138506851",
+            "-55.4918326231546",
+            "-39.724716804717",
+            "-50.0105377772616",
+            "-46.9162249942074",
+            "-46.2790546176639",
+            "-43.8232309398354",
+            "-49.7540870967205",
+            "-53.7133446915177",
+            "-51.6633994627191",
+            "-54.2858218610409",
+            "-42.9129639283819",
+            "-49.1980564160085",
+            "-52.7421500005312",
+            "-50.953927771995",
+            "-59.8079546364734",
+            "-53.20869108637",
+            "-42.9971732771755",
+            "-46.3393621442165",
+            "-39.1124509414121",
+            "-26.9291589283248",
+            "-48.0546634882376",
+            "-58.0973312599281",
+            "-52.8690868697358",
+        ]
+        flattened_conformers_scores = []
+        for compound in prime_step.data.compounds:
+            for enumeration in compound.get_enumerations():
+                for conformer in enumeration.get_conformers():
+                    flattened_conformers_scores.append(
+                        conformer.get_molecule().GetProp(_SPE.MMGBSA_SCORE)
+                    )
+        # self.assertEqual(float(prime_step.get_compounds()[0].get_enumerations()[0].get_conformers()[0].get_molecule()\
+        #                  .GetProp('r_psp_MMGBSA_dG_Bind')), -69.9651350867098)
+
+        for trial, value in zip(flattened_conformers_scores, scores):
+            self.assertEqual(round(float(trial)), round(float(value)))
diff --git a/tests/shaep/__init__.py b/tests/shaep/__init__.py
new file mode 100644
index 0000000..4f7a309
--- /dev/null
+++ b/tests/shaep/__init__.py
@@ -0,0 +1 @@
+from tests.shaep.test_shaep import *
diff --git a/tests/shaep/test_shaep.py b/tests/shaep/test_shaep.py
new file mode 100644
index 0000000..fa63496
--- /dev/null
+++ b/tests/shaep/test_shaep.py
@@ -0,0 +1,72 @@
+from icolos.core.containers.generic import GenericData
+from icolos.utils.enums.program_parameters import ShaepEnum
+from tests.tests_paths import (
+    PATHS_EXAMPLEDATA,
+    get_mol_as_Compound,
+    get_mol_as_Conformer,
+    MAIN_CONFIG,
+)
+import unittest
+import os
+
+from icolos.utils.enums.step_enums import StepBaseEnum, StepShaepEnum
+from icolos.core.workflow_steps.calculation.shaep import StepShaep
+from icolos.utils.general.files_paths import attach_root_path
+
+_SBE = StepBaseEnum
+_SSE = StepShaepEnum()
+_SE = ShaepEnum()
+
+
+class Test_Shaep(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/shaep")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        # TODO: update to load at least 3 compounds docked (at least 5 poses each)
+        mol = get_mol_as_Compound(PATHS_EXAMPLEDATA.SHAEP_LIGAND_DOCKED_POSE)
+        conf = get_mol_as_Conformer(PATHS_EXAMPLEDATA.SHAEP_LIGAND_DOCKED_POSE)
+        mol[0].add_conformers(conf, auto_update=True)
+        self.mol = mol
+
+        with open(PATHS_EXAMPLEDATA.PANTHER_NEGATIVE_IMAGE, "r") as f:
+            self.negative_image = f.read()
+
+    def test_shaep(self):
+        step_conf = {
+            _SBE.STEPID: "01_shaep",
+            _SBE.STEP_TYPE: _SBE.STEP_SHAEP,
+            _SBE.EXEC: {_SBE.EXEC_BINARYLOCATION: MAIN_CONFIG["SHAEP_LOCATION"]},
+        }
+        shaep_step = StepShaep(**step_conf)
+        shaep_step.data.compounds = [self.mol]
+        shaep_step.data.generic.add_file(
+            GenericData(file_name="neg_image.mol2", file_data=self.negative_image)
+        )
+        shaep_step.execute()
+
+        self.assertEqual(
+            float(
+                shaep_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetProp(_SE.TAG_SHAPE_SIMILARITY)
+            ),
+            0.737409,
+        )
+        self.assertEqual(
+            float(
+                shaep_step.get_compounds()[0][0][0]
+                .get_molecule()
+                .GetProp(_SE.TAG_ESP_SIMILARITY)
+            ),
+            0.106811,
+        )
+
+        # check, whether the tags got added
+        out_path = os.path.join(self._test_dir, "mols_nibr.sdf")
+        shaep_step.write_conformers(out_path)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 17358)
diff --git a/tests/step_utils/__init__.py b/tests/step_utils/__init__.py
new file mode 100644
index 0000000..053700f
--- /dev/null
+++ b/tests/step_utils/__init__.py
@@ -0,0 +1,4 @@
+from tests.step_utils.test_input_merger import *
+from tests.step_utils.test_input_preparator import *
+from tests.step_utils.test_run_variables_resolver import *
+from tests.step_utils.test_writeout import *
diff --git a/tests/step_utils/test_input_merger.py b/tests/step_utils/test_input_merger.py
new file mode 100644
index 0000000..7c33b4d
--- /dev/null
+++ b/tests/step_utils/test_input_merger.py
@@ -0,0 +1,262 @@
+import unittest
+
+from icolos.core.step_utils.input_merger import InputMerger, StepMerge
+from icolos.core.containers.compound import Conformer, Enumeration, Compound
+
+from icolos.utils.enums.step_enums import StepBaseEnum
+
+_SBE = StepBaseEnum
+
+
+class Test_InputMerger(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        pass
+
+    def setUp(self):
+        # comp1 has 2 enumerations, one with 2 and one with 3 conformers
+        comp1 = Compound(name="test_molecule", compound_number=0)
+        comp1_enum1 = Enumeration(smile="abc", molecule=None, enumeration_id=1)
+        comp1_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp1_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp1_enum2 = Enumeration(smile="def", molecule=None, enumeration_id=2)
+        comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp1.add_enumeration(comp1_enum1, auto_update=False)
+        comp1.add_enumeration(comp1_enum2, auto_update=False)
+
+        # comp2 has 3 enumerations, one with 1, one with 3 and one with 4 conformers
+        comp2 = Compound(name="test_molecule_new", compound_number=0)
+        comp2_enum1 = Enumeration(smile="kk", molecule=None, enumeration_id=0)
+        comp2_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp2_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp2_enum2 = Enumeration(smile="abc", molecule=None, enumeration_id=1)
+        comp2_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp2_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp2_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp2_enum3 = Enumeration(smile="xyz", molecule=None, enumeration_id=2)
+        comp2_enum3.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp2_enum3.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp2_enum3.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp2_enum3.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp2.add_enumeration(comp2_enum1, auto_update=False)
+        comp2.add_enumeration(comp2_enum2, auto_update=False)
+        comp2.add_enumeration(comp2_enum3, auto_update=False)
+
+        # comp3 has 1 enumeration, with 2 conformers (and a different number and name)
+        comp3 = Compound(name="test_molecule", compound_number=1)
+        comp3_enum1 = Enumeration(smile="abc", molecule=None, enumeration_id=0)
+        comp3_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp3_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp3_enum2 = Enumeration(smile="def", molecule=None, enumeration_id=1)
+        comp3_enum2.add_conformer(Conformer(conformer_id=0), auto_update=False)
+        comp3_enum2.add_conformer(Conformer(conformer_id=0), auto_update=False)
+        comp3_enum2.add_conformer(Conformer(conformer_id=0), auto_update=False)
+        comp3.add_enumeration(comp3_enum1, auto_update=False)
+        comp3.add_enumeration(comp3_enum2, auto_update=False)
+        self.list_compounds = [comp1, comp2, comp3]
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_merging_by_name_compound(self):
+        conf = {
+            _SBE.INPUT_MERGE_COMPOUNDS: True,
+            _SBE.INPUT_MERGE_COMPOUNDS_BY: _SBE.INPUT_MERGE_BY_NAME,
+            _SBE.INPUT_MERGE_ENUMERATIONS: False,
+        }
+        conf = StepMerge(**conf)
+        merger = InputMerger(conf)
+        list_compounds = merger.merge(self.list_compounds)
+
+        self.assertEqual(len(list_compounds), 2)
+        self.assertEqual(len(list_compounds[0].get_enumerations()), 4)
+        self.assertEqual(len(list_compounds[1].get_enumerations()), 3)
+
+        self.assertListEqual(
+            [c.get_name() for c in list_compounds],
+            ["test_molecule", "test_molecule_new"],
+        )
+        self.assertListEqual(
+            [
+                conf.get_index_string()
+                for c in list_compounds
+                for e in c.get_enumerations()
+                for conf in e.get_conformers()
+            ],
+            [
+                "0:0:0",
+                "0:0:1",
+                "0:1:0",
+                "0:1:1",
+                "0:1:2",
+                "0:2:0",
+                "0:2:1",
+                "0:3:0",
+                "0:3:1",
+                "0:3:2",
+                "1:0:0",
+                "1:0:1",
+                "1:1:0",
+                "1:1:1",
+                "1:1:2",
+                "1:2:0",
+                "1:2:1",
+                "1:2:2",
+                "1:2:3",
+            ],
+        )
+
+    def test_merging_by_id_compound(self):
+        conf = {
+            _SBE.INPUT_MERGE_COMPOUNDS: True,
+            _SBE.INPUT_MERGE_COMPOUNDS_BY: _SBE.INPUT_MERGE_BY_ID,
+            _SBE.INPUT_MERGE_ENUMERATIONS: False,
+        }
+        conf = StepMerge(**conf)
+        merger = InputMerger(conf)
+        list_compounds = merger.merge(self.list_compounds)
+
+        self.assertEqual(len(list_compounds), 2)
+        self.assertEqual(len(list_compounds[0].get_enumerations()), 5)
+        self.assertEqual(len(list_compounds[1].get_enumerations()), 2)
+
+        self.assertListEqual([c.get_name() for c in list_compounds], ["0", "1"])
+
+        self.assertListEqual(
+            [
+                conf.get_index_string()
+                for c in list_compounds
+                for e in c.get_enumerations()
+                for conf in e.get_conformers()
+            ],
+            [
+                "0:0:0",
+                "0:0:1",
+                "0:1:0",
+                "0:1:1",
+                "0:1:2",
+                "0:2:0",
+                "0:2:1",
+                "0:3:0",
+                "0:3:1",
+                "0:3:2",
+                "0:4:0",
+                "0:4:1",
+                "0:4:2",
+                "0:4:3",
+                "1:0:0",
+                "1:0:1",
+                "1:1:0",
+                "1:1:1",
+                "1:1:2",
+            ],
+        )
+
+    def test_merging_by_name_compound_enumeration_smile(self):
+        conf = {
+            _SBE.INPUT_MERGE_COMPOUNDS: True,
+            _SBE.INPUT_MERGE_COMPOUNDS_BY: _SBE.INPUT_MERGE_BY_NAME,
+            _SBE.INPUT_MERGE_ENUMERATIONS: True,
+            _SBE.INPUT_MERGE_ENUMERATIONS_BY: _SBE.INPUT_MERGE_BY_SMILE,
+        }
+        conf = StepMerge(**conf)
+        merger = InputMerger(conf)
+        list_compounds = merger.merge(self.list_compounds)
+
+        self.assertEqual(len(list_compounds), 2)
+        self.assertEqual(len(list_compounds[0].get_enumerations()), 2)
+        self.assertEqual(len(list_compounds[1].get_enumerations()), 3)
+
+        self.assertListEqual(
+            [c.get_name() for c in list_compounds],
+            ["test_molecule", "test_molecule_new"],
+        )
+        self.assertListEqual(
+            [
+                conf.get_index_string()
+                for c in list_compounds
+                for e in c.get_enumerations()
+                for conf in e.get_conformers()
+            ],
+            [
+                "0:0:0",
+                "0:0:1",
+                "0:0:2",
+                "0:0:3",
+                "0:1:0",
+                "0:1:1",
+                "0:1:2",
+                "0:1:3",
+                "0:1:4",
+                "0:1:5",
+                "1:0:0",
+                "1:0:1",
+                "1:1:0",
+                "1:1:1",
+                "1:1:2",
+                "1:2:0",
+                "1:2:1",
+                "1:2:2",
+                "1:2:3",
+            ],
+        )
+        self.assertListEqual(
+            [e.get_smile() for c in list_compounds for e in c.get_enumerations()],
+            ["abc", "def", "kk", "abc", "xyz"],
+        )
+
+    def test_merging_by_name_compound_enumeration_id(self):
+        conf = {
+            _SBE.INPUT_MERGE_COMPOUNDS: True,
+            _SBE.INPUT_MERGE_COMPOUNDS_BY: _SBE.INPUT_MERGE_BY_NAME,
+            _SBE.INPUT_MERGE_ENUMERATIONS: True,
+            _SBE.INPUT_MERGE_ENUMERATIONS_BY: _SBE.INPUT_MERGE_BY_ID,
+        }
+        conf = StepMerge(**conf)
+        merger = InputMerger(conf)
+        list_compounds = merger.merge(self.list_compounds)
+
+        self.assertEqual(len(list_compounds), 2)
+        self.assertEqual(len(list_compounds[0].get_enumerations()), 3)
+        self.assertEqual(len(list_compounds[1].get_enumerations()), 3)
+
+        self.assertListEqual(
+            [c.get_name() for c in list_compounds],
+            ["test_molecule", "test_molecule_new"],
+        )
+        self.assertListEqual(
+            [
+                conf.get_index_string()
+                for c in list_compounds
+                for e in c.get_enumerations()
+                for conf in e.get_conformers()
+            ],
+            [
+                "0:0:0",
+                "0:0:1",
+                "0:0:2",
+                "0:0:3",
+                "0:0:4",
+                "0:1:0",
+                "0:1:1",
+                "0:1:2",
+                "0:2:0",
+                "0:2:1",
+                "1:0:0",
+                "1:0:1",
+                "1:1:0",
+                "1:1:1",
+                "1:1:2",
+                "1:2:0",
+                "1:2:1",
+                "1:2:2",
+                "1:2:3",
+            ],
+        )
+        self.assertListEqual(
+            [e.get_smile() for c in list_compounds for e in c.get_enumerations()],
+            ["abc", "def", "abc", "kk", "abc", "xyz"],
+        )
diff --git a/tests/step_utils/test_input_preparator.py b/tests/step_utils/test_input_preparator.py
new file mode 100644
index 0000000..18c243c
--- /dev/null
+++ b/tests/step_utils/test_input_preparator.py
@@ -0,0 +1,101 @@
+import os
+import unittest
+from icolos.core.composite_agents.workflow import WorkFlow
+from icolos.core.step_utils.input_preparator import (
+    InputPreparator,
+    StepInputParameters,
+    StepInputSource,
+)
+from icolos.core.containers.compound import Conformer, Enumeration, Compound
+from icolos.core.workflow_steps.step import StepBase
+from icolos.utils.enums.step_enums import StepBaseEnum
+from icolos.utils.general.files_paths import attach_root_path
+from tests.tests_paths import PATHS_EXAMPLEDATA
+
+_SBE = StepBaseEnum
+
+
+class Test_InputPreparator(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/InputPreparator")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        # comp1 has 2 enumerations, one with 2 and one with 3 conformers
+        comp1 = Compound(name="test_molecule", compound_number=0)
+        comp1_enum1 = Enumeration(smile="abc", molecule=None, enumeration_id=1)
+        comp1_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp1_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp1_enum2 = Enumeration(smile="def", molecule=None, enumeration_id=2)
+        comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp1.add_enumeration(comp1_enum1, auto_update=False)
+        comp1.add_enumeration(comp1_enum2, auto_update=False)
+
+        source1 = StepInputSource(
+            source="mol1:cccccc1",
+            source_type=_SBE.INPUT_SOURCE_TYPE_STRING,
+            source_field="new_string",
+        )
+        source2 = StepInputSource(
+            source="prev_step", source_type=_SBE.INPUT_SOURCE_TYPE_STEP
+        )
+        source3 = StepInputSource(
+            source="mock_step",
+            source_type=_SBE.INPUT_SOURCE_TYPE_STEP,
+            source_field="old_input_field",
+            target_field="new_input_field",
+        )
+        source4 = StepInputSource(
+            source="mol2:cccc1", source_type=_SBE.INPUT_SOURCE_TYPE_STRING
+        )
+        source5 = StepInputSource(
+            source=attach_root_path(PATHS_EXAMPLEDATA.PARACETAMOL_COSMO),
+            source_type=_SBE.INPUT_SOURCE_TYPE_PATH,
+            source_field="cosmo",
+            target_field="cosmo",
+        )
+        source6 = StepInputSource(
+            source=attach_root_path(PATHS_EXAMPLEDATA.PARACETAMOL_COSMO),
+            source_type=_SBE.INPUT_SOURCE_TYPE_FILE,
+            source_field="cosmo_filepath",
+            target_field="cosmo_test_file",
+        )
+        source7 = StepInputSource(
+            source=attach_root_path(PATHS_EXAMPLEDATA.PANTHER_NEGATIVE_IMAGE),
+            extension="mol2",
+        )
+        self.params = StepInputParameters(
+            compounds=[source1, source4, source2], generic=[source7]
+        )
+        blank_params = StepInputParameters(compounds=[], generic=[])
+        mock_step = StepBase(step_id="mock_step", type=None, input=self.params)
+        prev_step = StepBase(step_id="prev_step", type=None, input=blank_params)
+        prev_step.data.compounds = [comp1]
+
+        workflow = WorkFlow()
+        workflow.add_step(prev_step)
+        workflow.add_step(mock_step)
+        self.workflow = workflow
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_input_preparation(self):
+        preparator = InputPreparator(workflow=self.workflow, logger=None)
+        data, work_dir = preparator.generate_input(
+            step_input=self.params, step_type=_SBE.STEP_SHAEP
+        )
+        self.assertEqual(len(data.compounds), 3)
+        self.assertEqual(len(data.generic.get_all_files()), 1)
+        with open(attach_root_path(PATHS_EXAMPLEDATA.PANTHER_NEGATIVE_IMAGE), "r") as f:
+            file = f.read()
+        self.assertEqual(
+            data.generic.get_file_by_name("panther_test_output.mol2").get_data(), file
+        )
+        self.assertEqual(len(data.compounds[1]), 1)
+        self.assertEqual((len(data.compounds[2][1])), 3)
diff --git a/tests/step_utils/test_run_variables_resolver.py b/tests/step_utils/test_run_variables_resolver.py
new file mode 100644
index 0000000..4ceca8c
--- /dev/null
+++ b/tests/step_utils/test_run_variables_resolver.py
@@ -0,0 +1,164 @@
+import unittest
+
+from icolos.core.containers.compound import Conformer, Enumeration, Compound
+from icolos.core.step_utils.run_variables_resolver import RunVariablesResolver
+from icolos.utils.enums.step_enums import StepBaseEnum
+
+_SBE = StepBaseEnum
+
+
+class Test_RunVariablesResolver(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.resolver = RunVariablesResolver()
+
+    def setUp(self):
+        # comp1 has 2 enumerations, one with 2 and one with 3 conformers
+        comp1 = Compound(name="test_molecule", compound_number=0)
+        comp1_enum1 = Enumeration(
+            smile="abc", molecule=None, enumeration_id=1, compound_object=comp1
+        )
+        comp1_enum1.add_conformer(
+            Conformer(conformer_id=0, enumeration_object=comp1_enum1), auto_update=True
+        )
+        comp1_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp1_enum2 = Enumeration(smile="def", molecule=None, enumeration_id=2)
+        comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp1_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp1.add_enumeration(comp1_enum1, auto_update=False)
+        comp1.add_enumeration(comp1_enum2, auto_update=False)
+
+        # comp2 has 3 enumerations, one with 1, one with 3 and one with 4 conformers
+        comp2 = Compound(name="test_molecule_new", compound_number=0)
+        comp2_enum1 = Enumeration(smile="kk", molecule=None, enumeration_id=0)
+        comp2_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp2_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp2_enum2 = Enumeration(smile="abc", molecule=None, enumeration_id=1)
+        comp2_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp2_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp2_enum2.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp2_enum3 = Enumeration(smile="xyz", molecule=None, enumeration_id=2)
+        comp2_enum3.add_conformer(
+            Conformer(conformer_id=0, enumeration_object=comp2_enum3), auto_update=True
+        )
+        comp2_enum3.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp2_enum3.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp2_enum3.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp2.add_enumeration(comp2_enum1, auto_update=False)
+        comp2.add_enumeration(comp2_enum2, auto_update=False)
+        comp2.add_enumeration(comp2_enum3, auto_update=False)
+
+        # comp3 has 1 enumeration, with 2 conformers (and a different number and name)
+        comp3 = Compound(name="test_molecule", compound_number=1)
+        comp3_enum1 = Enumeration(smile="abc", molecule=None, enumeration_id=0)
+        comp3_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp3_enum1.add_conformer(Conformer(conformer_id=0), auto_update=True)
+        comp3_enum2 = Enumeration(
+            smile="def", molecule=None, enumeration_id=1, compound_object=comp3
+        )
+        comp3_enum2.add_conformer(Conformer(conformer_id=0), auto_update=False)
+        comp3_enum2.add_conformer(Conformer(conformer_id=0), auto_update=False)
+        comp3_enum2.add_conformer(Conformer(conformer_id=0), auto_update=False)
+        comp3.add_enumeration(comp3_enum1, auto_update=False)
+        comp3.add_enumeration(comp3_enum2, auto_update=False)
+        self.list_compounds = [comp1, comp2, comp3]
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_compound_replacements(self):
+        inp = "/a/path/to/nowhere/[compound_id]/[compound_id]/compound_id/whatever/[compound_name]"
+        self.assertEqual(
+            self.resolver.resolve_compound_level(inp, self.list_compounds[0]),
+            "/a/path/to/nowhere/0/0/compound_id/whatever/test_molecule",
+        )
+        self.assertEqual(
+            self.resolver.resolve_compound_level(inp, self.list_compounds[1]),
+            "/a/path/to/nowhere/0/0/compound_id/whatever/test_molecule_new",
+        )
+        self.assertEqual(
+            self.resolver.resolve_compound_level(inp, self.list_compounds[2]),
+            "/a/path/to/nowhere/1/1/compound_id/whatever/test_molecule",
+        )
+
+        # test what happens, when no replacement is done
+        inp = "/a/string/withouttreplacement"
+        self.assertEqual(
+            self.resolver.resolve_compound_level(inp, self.list_compounds[0]), inp
+        )
+
+    def test_enumeration_replacements(self):
+        inp = "/a/path/to/nowhere/[compound_id]/[enumeration_id]/[enumeration_string]/whatever/[enumeration_id]"
+        self.assertEqual(
+            self.resolver.resolve_enumeration_level(inp, self.list_compounds[0][0]),
+            "/a/path/to/nowhere/[compound_id]/1/0:1/whatever/1",
+        )
+        self.assertEqual(
+            self.resolver.resolve_enumeration_level(inp, self.list_compounds[0][1]),
+            "/a/path/to/nowhere/[compound_id]/2/:2/whatever/2",
+        )
+        self.assertEqual(
+            self.resolver.resolve_enumeration_level(inp, self.list_compounds[2][1]),
+            "/a/path/to/nowhere/[compound_id]/1/1:1/whatever/1",
+        )
+
+        # test what happens, when no replacement is done
+        inp = "/a/string/withouttreplacement"
+        self.assertEqual(
+            self.resolver.resolve_enumeration_level(inp, self.list_compounds[0][0]), inp
+        )
+
+    def test_conformer_replacements(self):
+        inp = "/a/path/[conformer_string]to/nowhere/[compound_id]/[conformer_id]/[enumeration_string]/whatever/[conformer_id]"
+        self.assertEqual(
+            self.resolver.resolve_conformer_level(inp, self.list_compounds[0][0][0]),
+            "/a/path/0:1:0to/nowhere/[compound_id]/0/[enumeration_string]/whatever/0",
+        )
+        self.assertEqual(
+            self.resolver.resolve_conformer_level(inp, self.list_compounds[0][0][1]),
+            "/a/path/0:1:1to/nowhere/[compound_id]/1/[enumeration_string]/whatever/1",
+        )
+        self.assertEqual(
+            self.resolver.resolve_conformer_level(inp, self.list_compounds[2][0][1]),
+            "/a/path/:0:1to/nowhere/[compound_id]/1/[enumeration_string]/whatever/1",
+        )
+        self.assertEqual(
+            self.resolver.resolve_conformer_level(inp, self.list_compounds[1][2][0]),
+            "/a/path/:2:0to/nowhere/[compound_id]/0/[enumeration_string]/whatever/0",
+        )
+
+        # test what happens, when no replacement is done
+        inp = "/a/string/withouttreplacement"
+        self.assertEqual(
+            self.resolver.resolve_conformer_level(inp, self.list_compounds[0][0][0]),
+            inp,
+        )
+
+    def test_resolve(self):
+        inp = "/a/path/[conformer_string]to/nowhere/[compound_id]/[conformer_id]/[enumeration_string]/whatever/[compound_name]"
+        self.assertEqual(
+            self.resolver.resolve(inp, self.list_compounds[0][0][0]),
+            "/a/path/0:1:0to/nowhere/0/0/0:1/whatever/test_molecule",
+        )
+        self.assertEqual(
+            self.resolver.resolve(inp, self.list_compounds[0][0]),
+            "/a/path/[conformer_string]to/nowhere/0/[conformer_id]/0:1/whatever/test_molecule",
+        )
+        self.assertEqual(
+            self.resolver.resolve(inp, self.list_compounds[0]),
+            "/a/path/[conformer_string]to/nowhere/0/[conformer_id]/[enumeration_string]/whatever/test_molecule",
+        )
+
+        # fails for cases where the linking conformer -> enumeration -> compound is not established
+        try:
+            self.resolver.resolve(inp, self.list_compounds[2][0][1])
+        except Exception as e:
+            self.assertEqual(
+                e.__str__(), "'NoneType' object has no attribute 'get_compound_number'"
+            )
+
+        # test what happens, when no replacement is done
+        inp = "/a/string/withouttreplacement"
+        self.assertEqual(self.resolver.resolve(inp, self.list_compounds[0][0][0]), inp)
diff --git a/tests/step_utils/test_structconvert.py b/tests/step_utils/test_structconvert.py
new file mode 100644
index 0000000..b11a601
--- /dev/null
+++ b/tests/step_utils/test_structconvert.py
@@ -0,0 +1,32 @@
+import os
+import unittest
+from icolos.core.step_utils.structconvert import StructConvert
+from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum
+from icolos.utils.enums.step_enums import StepBaseEnum
+from icolos.utils.general.files_paths import attach_root_path, remove_folder
+from tests.tests_paths import PATHS_EXAMPLEDATA
+
+_SBE = StepBaseEnum
+_SEE = SchrodingerExecutablesEnum()
+
+
+class Test_Structconvert(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/structconvert")
+        remove_folder(cls._test_dir)
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        pass
+
+    def test_sdf2pdb(self):
+        executor = StructConvert(prefix_execution=_SEE.SCHRODINGER_MODULE)
+        output_path = os.path.join(self._test_dir, "output_small_molecule.pdb")
+        executor.sdf2pdb(
+            sdf_file=PATHS_EXAMPLEDATA.SMALL_MOLECULE_SDF_PATH, pdb_file=output_path
+        )
+
+        stat_inf = os.stat(output_path)
+        self.assertEqual(stat_inf.st_size, 2209)
diff --git a/tests/step_utils/test_writeout.py b/tests/step_utils/test_writeout.py
new file mode 100644
index 0000000..e5bac42
--- /dev/null
+++ b/tests/step_utils/test_writeout.py
@@ -0,0 +1,335 @@
+import os
+import unittest
+from icolos.core.containers.generic import GenericContainer, GenericData
+from icolos.core.containers.compound import Compound, Enumeration
+from icolos.core.step_utils.input_preparator import StepData
+from icolos.core.step_utils.step_writeout import WriteOutHandler
+from icolos.utils.enums.step_enums import StepBaseEnum
+from icolos.utils.general.files_paths import attach_root_path, remove_folder
+from tests.tests_paths import PATHS_EXAMPLEDATA, get_mol_as_Conformer
+import shutil
+
+_SBE = StepBaseEnum
+
+
+class Test_WriteOut(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/write-out")
+        remove_folder(cls._test_dir)
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        comp = Compound(compound_number=1, name="paracetamol")
+        enum_mol = get_mol_as_Conformer(PATHS_EXAMPLEDATA.PARACETAMOL_PATH)[
+            0
+        ].get_molecule()
+        comp.add_enumeration(Enumeration(molecule=enum_mol), auto_update=True)
+        conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS)
+        comp[0].add_conformers(conformers, auto_update=True)
+        self.compound = comp
+
+        comp2 = Compound(compound_number=2)
+        comp2.add_enumeration(Enumeration(molecule=enum_mol), auto_update=True)
+        conformers = get_mol_as_Conformer(PATHS_EXAMPLEDATA.CLUSTERING_11CONFS)
+        comp2[0].add_conformers(conformers, auto_update=True)
+        self.compound2 = comp2
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_conformer_writeout_merged(self):
+        conf = {
+            _SBE.WRITEOUT_CONFIG: {
+                _SBE.WRITEOUT_COMP: {
+                    _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS
+                },
+                _SBE.WRITEOUT_DESTINATION: {
+                    _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE,
+                    _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_SDF,
+                },
+            }
+        }
+        writeout_handler = WriteOutHandler(**conf)
+        writeout_handler.set_data(StepData(compounds=[self.compound, self.compound2]))
+
+        writeout_handler.config.destination.resource = os.path.join(
+            self._test_dir, "both_compounds.sdf"
+        )
+        writeout_handler.write()
+        stat_inf = os.stat(os.path.join(self._test_dir, "both_compounds.sdf"))
+        self.assertGreater(stat_inf.st_size, 39000)
+
+    def test_conformer_writeout_split(self):
+        conf = {
+            _SBE.WRITEOUT_CONFIG: {
+                _SBE.WRITEOUT_COMP: {
+                    _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS
+                },
+                _SBE.WRITEOUT_DESTINATION: {
+                    _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE,
+                    _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_SDF,
+                    _SBE.WRITEOUT_DESTINATION_MERGE: False,
+                },
+            }
+        }
+        writeout_handler = WriteOutHandler(**conf)
+        writeout_handler.set_data(StepData(compounds=[self.compound, self.compound2]))
+
+        writeout_handler.config.destination.resource = os.path.join(
+            self._test_dir, "[compound_id]_split.sdf"
+        )
+        writeout_handler.write()
+        stat_inf = os.stat(os.path.join(self._test_dir, "1_split.sdf"))
+        self.assertGreater(stat_inf.st_size, 19900)
+        stat_inf = os.stat(os.path.join(self._test_dir, "2_split.sdf"))
+        self.assertGreater(stat_inf.st_size, 19500)
+
+    def test_extradata_writeout(self):
+        conf = {
+            _SBE.WRITEOUT_CONFIG: {
+                _SBE.WRITEOUT_COMP: {
+                    _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_EXTRADATA,
+                    _SBE.WRITEOUT_COMP_KEY: "testdata",
+                },
+                _SBE.WRITEOUT_DESTINATION: {
+                    _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE,
+                    _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_TXT,
+                },
+            }
+        }
+        self.compound[0][0].add_extra_data("testdata", ["this\n", "is\n", "a\ntest"])
+        self.compound[0][1].add_extra_data(
+            "testdata", "YETANOTHERTEST\nthis\nis\na\ntest"
+        )
+        self.compound[0]._conformers = [self.compound[0][0], self.compound[0][1]]
+
+        writeout_handler = WriteOutHandler(**conf)
+        writeout_handler.set_data(StepData(compounds=[self.compound]))
+
+        # generate two files
+        writeout_handler.config.destination.resource = os.path.join(
+            self._test_dir, "extra_writeout/[conformer_id].txt"
+        )
+        writeout_handler.write()
+        stat_inf = os.stat(os.path.join(self._test_dir, "extra_writeout/0.txt"))
+        self.assertEqual(stat_inf.st_size, 15)
+        stat_inf = os.stat(os.path.join(self._test_dir, "extra_writeout/1.txt"))
+        self.assertEqual(stat_inf.st_size, 29)
+
+    def test_tabular_writeout(self):
+        config = {
+            _SBE.WRITEOUT_CONFIG: {
+                _SBE.WRITEOUT_COMP: {
+                    _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS
+                },
+                _SBE.WRITEOUT_DESTINATION: {
+                    _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE,
+                    _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_CSV,
+                },
+            }
+        }
+        for idx, conf in enumerate(self.compound[0].get_conformers()):
+            conf.get_molecule().SetProp("Gsolv_whatever", str(idx))
+        self.compound[0][3].get_molecule().SetProp("another_prop", "bbc")
+        writeout_handler = WriteOutHandler(**config)
+        writeout_handler.set_data(StepData(compounds=[self.compound]))
+
+        # write-out without selecting any tags
+        writeout_handler.config.destination.resource = os.path.join(
+            self._test_dir, "tabular_notagsselected_[conformer_id].csv"
+        )
+        writeout_handler.write()
+        stat_inf = os.stat(os.path.join(self._test_dir, "tabular_notagsselected_0.csv"))
+        self.assertGreater(stat_inf.st_size, 250)
+
+        # write-out with selecting tags
+        writeout_handler.config.destination.resource = os.path.join(
+            self._test_dir, "tabular_tagsselected_[conformer_id].csv"
+        )
+        writeout_handler.config.compounds.selected_tags = [
+            "Gsolv_whatever",
+            "Gsolv_dmso",
+            "another_prop",
+        ]
+        writeout_handler.write()
+        stat_inf = os.stat(os.path.join(self._test_dir, "tabular_tagsselected_0.csv"))
+        self.assertGreater(stat_inf.st_size, 300)
+
+    def test_tabular_writeout_aggregate(self):
+        config = {
+            _SBE.WRITEOUT_CONFIG: {
+                _SBE.WRITEOUT_COMP: {
+                    _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS
+                },
+                _SBE.WRITEOUT_DESTINATION: {
+                    _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE,
+                    _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_CSV,
+                },
+            }
+        }
+        for idx, conf in enumerate(self.compound[0].get_conformers()):
+            conf.get_molecule().SetProp("Gsolv_whatever", str(idx))
+        self.compound[0][3].get_molecule().SetProp("another_prop", "bbc")
+        writeout_handler = WriteOutHandler(**config)
+        writeout_handler.set_data(StepData(compounds=[self.compound]))
+
+        # write-out without selecting tags and using compound-level aggregation
+        writeout_handler.config.destination.resource = os.path.join(
+            self._test_dir, "tabular_notagsselected_[conformer_id]_compagg.csv"
+        )
+        writeout_handler.config.compounds.aggregation.mode = (
+            _SBE.WRITEOUT_COMP_AGGREGATION_MODE_BESTPERCOMPOUND
+        )
+        writeout_handler.config.compounds.selected_tags = ["Gsolv_dmso"]
+        writeout_handler.config.compounds.aggregation.key = "Gsolv_dmso"
+        writeout_handler.write()
+        stat_inf = os.stat(
+            os.path.join(self._test_dir, "tabular_notagsselected_6_compagg.csv")
+        )
+        self.assertEqual(stat_inf.st_size, 56)
+
+        # write-out without selecting tags and using compound-level aggregation (reverse)
+        writeout_handler.config.destination.resource = os.path.join(
+            self._test_dir, "tabular_notagsselected_[conformer_id]_compagg.csv"
+        )
+        writeout_handler.config.compounds.aggregation.mode = (
+            _SBE.WRITEOUT_COMP_AGGREGATION_MODE_BESTPERCOMPOUND
+        )
+        writeout_handler.config.compounds.selected_tags = ["Gsolv_dmso"]
+        writeout_handler.config.compounds.aggregation.key = "Gsolv_dmso"
+        writeout_handler.config.compounds.aggregation.highest_is_best = False
+        writeout_handler.write()
+        stat_inf = os.stat(
+            os.path.join(self._test_dir, "tabular_notagsselected_7_compagg.csv")
+        )
+        self.assertEqual(stat_inf.st_size, 56)
+
+    def test_reinvent_writeout_empty(self):
+        config = {
+            _SBE.WRITEOUT_CONFIG: {
+                _SBE.WRITEOUT_COMP: {
+                    _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS
+                },
+                _SBE.WRITEOUT_DESTINATION: {
+                    _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_REINVENT,
+                    _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_JSON,
+                    _SBE.WRITEOUT_DESTINATION_RESOURCE: os.path.join(
+                        self._test_dir, "reinvent_empty.json"
+                    ),
+                },
+            }
+        }
+        for idx, conf in enumerate(self.compound[0].get_conformers()):
+            conf.get_molecule().SetProp("Gsolv_whatever", str(idx))
+        self.compound[0].clear_conformers()
+        writeout_handler = WriteOutHandler(**config)
+        writeout_handler.set_data(StepData(compounds=[self.compound]))
+
+        writeout_handler.config.compounds.selected_tags = [
+            "conformer_energy",
+            "G_octanol",
+        ]
+
+        # write-out to console (REINVENT style)
+        writeout_handler.write()
+
+        # write-out to file
+        out_path = os.path.join(self._test_dir, "reinvent_empty.json")
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 300)
+
+    def test_reinvent_writeout_merged(self):
+        config = {
+            _SBE.WRITEOUT_CONFIG: {
+                _SBE.WRITEOUT_COMP: {
+                    _SBE.WRITEOUT_COMP_CATEGORY: _SBE.WRITEOUT_COMP_CATEGORY_CONFORMERS
+                },
+                _SBE.WRITEOUT_DESTINATION: {
+                    _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_REINVENT,
+                    _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_JSON,
+                    _SBE.WRITEOUT_DESTINATION_RESOURCE: os.path.join(
+                        self._test_dir, "reinvent.json"
+                    ),
+                },
+            }
+        }
+        for idx, conf in enumerate(self.compound[0].get_conformers()):
+            conf.get_molecule().SetProp("Gsolv_whatever", str(idx))
+        self.compound[0][3].get_molecule().SetProp("another_prop", "bbc")
+        writeout_handler = WriteOutHandler(**config)
+        writeout_handler.set_data(StepData(compounds=[self.compound]))
+
+        writeout_handler.config.compounds.selected_tags = [
+            "conformer_energy",
+            "G_octanol",
+        ]
+
+        # write-out to console (REINVENT style)
+        writeout_handler.write()
+
+        # write-out to file
+        out_path = os.path.join(self._test_dir, "reinvent.json")
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 300)
+
+    def test_generic_writeout(self):
+        conf = {
+            _SBE.WRITEOUT_CONFIG: {
+                _SBE.WRITEOUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "txt"},
+                _SBE.WRITEOUT_DESTINATION: {
+                    _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE,
+                    _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_TXT,
+                },
+            }
+        }
+        gc = GenericContainer()
+        gc.add_file(
+            GenericData(
+                file_name="anothertest.txt",
+                file_data="YETANOTHERTEST\nthis\nis\na\ntest",
+            )
+        )
+        writeout_handler = WriteOutHandler(**conf)
+        writeout_handler.set_data(StepData(generic=gc))
+
+        # generate two files
+        out_path = os.path.join(self._test_dir, "anothertest.txt")
+        writeout_handler.config.destination.resource = out_path
+        writeout_handler.write()
+        out_path = os.path.join(self._test_dir, "anothertest_0.txt")
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 29)
+
+    def test_generic_writeout_path(self):
+        conf = {
+            _SBE.WRITEOUT_CONFIG: {
+                _SBE.WRITEOUT_GENERIC: {_SBE.WRITEOUT_GENERIC_KEY: "xtc"},
+                _SBE.WRITEOUT_DESTINATION: {
+                    _SBE.WRITEOUT_DESTINATION_TYPE: _SBE.WRITEOUT_DESTINATION_TYPE_FILE,
+                    _SBE.WRITEOUT_DESTINATION_FORMAT: _SBE.FORMAT_TXT,
+                },
+            }
+        }
+
+        writeout_handler = WriteOutHandler(**conf)
+        # simulate the data being a path to a large file on disk
+        gc = GenericContainer()
+        gc.add_file(
+            GenericData(
+                file_name="md_0_1.xtc", file_data=PATHS_EXAMPLEDATA.GROMACS_PDB_FILE
+            )
+        )
+        writeout_handler.set_data(StepData(generic=gc))
+        out_path = os.path.join(self._test_dir, "md_0_1.xtc")
+        writeout_handler.config.destination.resource = out_path
+        out_path = os.path.join(self._test_dir, "md_0_1_0.xtc")
+        writeout_handler.write()
+
+        # reset the files since by default it gets removed from the source location
+        if not os.path.isfile(PATHS_EXAMPLEDATA.GROMACS_PDB_FILE):
+            shutil.copyfile(out_path, PATHS_EXAMPLEDATA.GROMACS_PDB_FILE)
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 53635)
diff --git a/tests/structure_prediction/__init__.py b/tests/structure_prediction/__init__.py
new file mode 100644
index 0000000..a8c2721
--- /dev/null
+++ b/tests/structure_prediction/__init__.py
@@ -0,0 +1,5 @@
+from tests.structure_prediction.test_peptide_embedder import *
+from tests.structure_prediction.test_pdb_fixer import *
+
+# from tests.structure_prediction.test_dssp import *
+# TODO: work out why the dssp unit test hangs sometimes
diff --git a/tests/structure_prediction/test_dssp.py b/tests/structure_prediction/test_dssp.py
new file mode 100644
index 0000000..e59afca
--- /dev/null
+++ b/tests/structure_prediction/test_dssp.py
@@ -0,0 +1,52 @@
+from icolos.core.containers.generic import GenericData
+import unittest
+from icolos.core.workflow_steps.structure_prediction.dssp import StepDSSP
+from icolos.utils.general.files_paths import attach_root_path
+import os
+from tests.tests_paths import PATHS_EXAMPLEDATA
+from icolos.utils.enums.step_enums import StepBaseEnum
+
+_SBE = StepBaseEnum
+
+
+class TestDSSP(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/structure_prediction")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        with open(attach_root_path(PATHS_EXAMPLEDATA.DSSP_PDB_1), "r") as f:
+            self.pdb1 = f.read()
+
+        with open(attach_root_path(PATHS_EXAMPLEDATA.DSSP_PDB_2), "r") as f:
+            self.pdb2 = f.read()
+
+        with open(attach_root_path(PATHS_EXAMPLEDATA.DSSP_PDB_3), "r") as f:
+            self.pdb3 = f.read()
+
+    def test_dssp(self):
+        step_conf = {
+            _SBE.STEPID: "01_DSSP",
+            _SBE.STEP_TYPE: _SBE.STEP_DSSP,
+            _SBE.EXEC: {_SBE.EXEC_PREFIXEXECUTION: "module load DSSP"},
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {"--output-format": "dssp"}
+                },
+                _SBE.SETTINGS_ADDITIONAL: {},
+            },
+        }
+
+        step_dssp = StepDSSP(**step_conf)
+        pdb1 = GenericData(file_name="test_1.pdb", file_data=self.pdb1)
+        pdb2 = GenericData(file_name="test_2.pdb", file_data=self.pdb2)
+        pdb3 = GenericData(file_name="test_3.pdb", file_data=self.pdb3)
+        step_dssp.data.generic.add_files([pdb1, pdb2, pdb3])
+        step_dssp.execute()
+
+        out_path = os.path.join(self._test_dir, "dssp_output_test_1.txt")
+        step_dssp.write_generic_by_name(self._test_dir, "dssp_output_test_1.txt")
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 1234)
diff --git a/tests/structure_prediction/test_pdb_fixer.py b/tests/structure_prediction/test_pdb_fixer.py
new file mode 100644
index 0000000..9c88b3a
--- /dev/null
+++ b/tests/structure_prediction/test_pdb_fixer.py
@@ -0,0 +1,63 @@
+from icolos.core.containers.generic import GenericData
+import unittest
+from icolos.core.workflow_steps.structure_prediction.pdb_fixer import StepPdbFixer
+from icolos.utils.general.files_paths import attach_root_path
+import os
+from tests.tests_paths import PATHS_EXAMPLEDATA
+from icolos.utils.enums.step_enums import StepBaseEnum
+
+_SBE = StepBaseEnum
+
+
+class TestPdbFixer(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/structure_prediction")
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        with open(attach_root_path(PATHS_EXAMPLEDATA.PANTHER_RECEPTOR_PDB), "r") as f:
+            self.pdb = f.read()
+
+    def test_pdb_fixer_default(self):
+        step_conf = {
+            _SBE.STEPID: "01_PDB_FIXER",
+            _SBE.STEP_TYPE: _SBE.STEP_PDB_FIXER,
+            _SBE.EXEC: {},
+            _SBE.SETTINGS: {_SBE.SETTINGS_ARGUMENTS: {}, _SBE.SETTINGS_ADDITIONAL: {}},
+        }
+        step_pdb_fixer = StepPdbFixer(**step_conf)
+        test_pdb = GenericData(file_name="test.pdb", file_data=self.pdb)
+        step_pdb_fixer.data.generic.add_file(test_pdb)
+        step_pdb_fixer.execute()
+
+        out_path = os.path.join(self._test_dir, "test.pdb")
+        step_pdb_fixer.write_generic_by_extension(path=self._test_dir, ext="pdb")
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 738000)
+
+    def test_pdb_fixer(self):
+        step_conf = {
+            _SBE.STEPID: "01_PDB_FIXER",
+            _SBE.STEP_TYPE: _SBE.STEP_PDB_FIXER,
+            _SBE.EXEC: {},
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        "--keep-heterogens": "water",
+                        "--ph": "4.0",
+                    },
+                }
+            },
+        }
+        step_pdb_fixer = StepPdbFixer(**step_conf)
+        test_pdb = GenericData(file_name="test_2.pdb", file_data=self.pdb)
+        step_pdb_fixer.data.generic.add_file(test_pdb)
+        step_pdb_fixer.execute()
+
+        out_path = os.path.join(self._test_dir, "test_2.pdb")
+        step_pdb_fixer.write_generic_by_extension(path=self._test_dir, ext="pdb")
+        stat_inf = os.stat(out_path)
+        self.assertGreater(stat_inf.st_size, 710000)
diff --git a/tests/structure_prediction/test_peptide_embedder.py b/tests/structure_prediction/test_peptide_embedder.py
new file mode 100644
index 0000000..de47707
--- /dev/null
+++ b/tests/structure_prediction/test_peptide_embedder.py
@@ -0,0 +1,51 @@
+from icolos.core.containers.generic import GenericData
+import unittest
+from icolos.core.workflow_steps.structure_prediction.peptide_embedder import (
+    StepPeptideEmbedder,
+)
+from icolos.utils.general.files_paths import attach_root_path
+import os
+from tests.tests_paths import PATHS_EXAMPLEDATA
+from icolos.utils.enums.step_enums import StepBaseEnum
+
+_SBE = StepBaseEnum
+
+
+class TestPeptideEmbedder(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._test_dir = attach_root_path("tests/junk/structure_prediction")
+
+        if not os.path.isdir(cls._test_dir):
+            os.makedirs(cls._test_dir)
+
+    def setUp(self):
+        with open(attach_root_path(PATHS_EXAMPLEDATA.TEST_FASTA_FILE), "r") as f:
+            self.fasta = f.read()
+
+    def test_peptide_embedder(self):
+        step_conf = {
+            _SBE.STEPID: "01_peptide_embedder",
+            _SBE.STEP_TYPE: _SBE.STEP_PEPTIDE_EMBEDDER,
+            _SBE.EXEC: {},
+            _SBE.SETTINGS: {
+                _SBE.SETTINGS_ARGUMENTS: {
+                    _SBE.SETTINGS_ARGUMENTS_FLAGS: [],
+                    _SBE.SETTINGS_ARGUMENTS_PARAMETERS: {
+                        "-prime_opt": "OPLS_VERSION=OPLS3e",
+                        "-HOST": "cpu-only",
+                    },
+                },
+                _SBE.SETTINGS_ADDITIONAL: {},
+            },
+        }
+
+        step_embedder = StepPeptideEmbedder(**step_conf)
+        fasta_obj = GenericData(file_name="test_seq.fasta", file_data=self.fasta)
+        step_embedder.data.generic.add_file(fasta_obj)
+        step_embedder.execute()
+
+        out_path = os.path.join(self._test_dir, "sequence_0.pdb")
+        step_embedder.write_generic_by_extension(path=self._test_dir, ext="pdb")
+        stat_inf = os.stat(out_path)
+        self.assertEqual(stat_inf.st_size, 17504)
diff --git a/tests/tests_paths.py b/tests/tests_paths.py
new file mode 100644
index 0000000..5c25ef1
--- /dev/null
+++ b/tests/tests_paths.py
@@ -0,0 +1,396 @@
+from icolos.utils.enums.program_parameters import PantherEnum
+from icolos.core.containers.generic import GenericData
+import json
+import os
+from typing import List, Dict
+from icolos.core.containers.compound import Compound, Enumeration, Conformer
+from icolos.utils.general.files_paths import attach_root_path
+from icolos.utils.smiles import to_smiles
+from rdkit import Chem
+from icolos.utils.enums.write_out_enums import WriteOutEnum
+from shutil import copytree, rmtree
+
+_PE = PantherEnum()
+_WE = WriteOutEnum()
+
+# load the instantiated "config.json", holding the license key for OpenEye for example
+try:
+    with open(
+        attach_root_path("icolos/config/unit_tests_config/config.json"), "r"
+    ) as f:
+        MAIN_CONFIG = json.load(f)
+except:
+    MAIN_CONFIG = {}
+
+
+def expand_path(path: str) -> str:
+    return os.path.join(MAIN_CONFIG["ICOLOS_TEST_DATA"], path)
+
+
+def create_test_dir(source: str, dest: str) -> None:
+    try:
+        if os.path.isdir(dest):
+            # remove the existing directory structure before calling copytree or it will complain
+            rmtree(dest)
+        copytree(source, dest)
+    except Exception as e:
+        os.makedirs(dest)
+
+
+def export_unit_test_env_vars():
+    # make sure "PATH" is executed last to expand upwards variables
+    for key in MAIN_CONFIG.keys():
+        if key != "PATH":
+            if isinstance(MAIN_CONFIG[key], str):
+                os.environ[str(key)] = os.path.expandvars(MAIN_CONFIG[key])
+            # iterate through nested dicts
+            elif isinstance(MAIN_CONFIG[key], dict):
+                for k in MAIN_CONFIG[key].keys():
+                    os.environ[str(k)] = os.path.expandvars(MAIN_CONFIG[key][k])
+    if "PATH" in MAIN_CONFIG.keys():
+        os.environ["PATH"] = os.path.expandvars(MAIN_CONFIG["PATH"])
+
+
+class PATHS_1UYD:
+
+    GRID_PATH = expand_path("Glide/1UYD_grid_no_constraints.zip")
+    GRID_CONSTRAINTS_PATH = expand_path("Glide/1UYD_grid_constraints.zip")
+    PDBQT_PATH = expand_path("AutoDockVina/1UYD_fixed.pdbqt")
+    PDB_PATH = expand_path("molecules/1UYD/1UYD_apo.pdb")
+    LIGANDS = expand_path("molecules/1UYD/1UYD_ligands.sdf")
+    NATIVE_LIGAND_SDF = expand_path("molecules/1UYD/PU8_native_ligand.sdf")
+    NATIVE_LIGAND_PDB = expand_path("molecules/1UYD/PU8_native_ligand.pdb")
+    LIG4_POSES = expand_path("fep_plus/1uyd_lig4.sdf")
+    XRAY_STRUCTURES = expand_path("fep_plus/xray_structures")
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+class PATHS_EXAMPLEDATA:
+
+    ASPIRIN_SMI_PATH = expand_path("molecules/aspirin.smi")
+    PARACETAMOL_SMI_PATH = expand_path("molecules/paracetamol.smi")
+    ASPIRIN_PATH = expand_path("molecules/aspirin.sdf")
+    PARACETAMOL_PATH = expand_path("molecules/paracetamol.sdf")
+    SMALL_MOLECULES_SMI_PATH = expand_path("molecules/small_molecules.smi")
+    SMALL_MOLECULES_CSV_PATH = expand_path("molecules/small_molecules.csv")
+    SMALL_MOLECULES_CSV_PATH_DELIMITER_SEMICOLON = expand_path(
+        "molecules/small_molecules_semicolon.csv"
+    )
+    MEDIUM_MOLECULES_SMI_PATH = expand_path("molecules/medium_molecules.smi")
+    SMALL_MOLECULES_SDF_PATH = expand_path("molecules/small_molecules.sdf")
+    SMALL_MOLECULE_SDF_PATH = expand_path("molecules/small_molecule.sdf")
+    SMALL_MOLECULES_JSON_PATH = expand_path("reinvent/small_input.json")
+    MEDIUM_MOLECULES_SDF_PATH = expand_path("molecules/medium_molecules.sdf")
+    PARACETAMOL_MULTIPLE_CONF = expand_path("molecules/paracetamol_multiple.sdf")
+    PARACETAMOL_COSMO = expand_path("Turbomole/paracetamol.cosmo")
+    PARACETAMOL_COSMO_OUTPUT = expand_path("cosmo/cosmotherm.out")
+    EPSA_MODEL_PATH = expand_path("models/ePSA_example.pkl")
+    EPSA_BOLTZMANN_WEIGHTING_EXAMPLE_MOLECULE = expand_path(
+        "models/ePSA_Boltzmann_weighting.sdf"
+    )
+    GLIDE_EXAMPLE_IN = expand_path("Glide/example.in")
+    EPSA_EXAMPLE_MOLECULE = expand_path("models/ePSA_example_mol.sdf")
+    PRIME_RECEPTOR_COX2 = expand_path("prime/cox2_receptor.mae")
+    PRIME_COX2_GRID = expand_path("molecules/1CX2/1cx2_GridGen.zip")
+    PRIME_DOCKED_LIGAND_SDF = expand_path("prime/docked_ligand.sdf")
+    CLUSTERING_11CONFS = expand_path("clustering/paracetamol_11_conformers.sdf")
+    PANTHER_CONFIG = expand_path("panther/default_panther.in")
+    PANTHER_RECEPTOR_PDB = expand_path("panther/COX2_A.pdb")
+    PANTHER_NEGATIVE_IMAGE = expand_path("panther/panther_test_output.mol2")
+    SHAEP_LIGAND_DOCKED_POSE = expand_path("panther/cox2_ligand_bound.sdf")
+
+    GROMACS_STRUCTURE_FILE = expand_path("gromacs/test_structure.gro")
+    GROMACS_PDB_FILE = expand_path("gromacs/test_structure.pdb")
+    GROMACS_NVT_MDP = expand_path("gromacs/nvt_equil.mdp")
+    GROMACS_NPT_MDP = expand_path("gromacs/npt_equil.mdp")
+    GROMACS_MINIM_MDP = expand_path("gromacs/minim.mdp")
+    GROMACS_IONS_MDP = expand_path("gromacs/ions.mdp")
+    GROMACS_MD_MDP = expand_path("gromacs/md.mdp")
+    GROMACS_TPR_FILE = expand_path("gromacs/test.tpr")
+    GROMACS_TOPOL_FILE = expand_path("gromacs/topol.top")
+    GROMACS_GROMPP_INPUT_STRUCTURE = expand_path("gromacs/grompp_input.gro")
+    GROMACS_XTC = expand_path("gromacs/md_0_1.xtc")
+    GROMACS_TPR_TRJCONV = expand_path("gromacs/md_0_1.tpr")
+    GROMACS_HOLO_STRUCTURE = expand_path("gromacs/protein/1BVG.pdb")
+    GROMACS_HOLO_STRUCTURE_GRO = expand_path("gromacs/protein/1BVG.gro")
+    GROMACS_DMP_LIGAND_TRJ = expand_path("gromacs/protein/DMP.xtc")
+    GROMACS_DMP_LIGAND_SDF = expand_path("gromacs/protein/DMP.sdf")
+    GROMACS_PROTEIN_FILE_BASE = expand_path("gromacs/protein")
+    GROMACS_GROMPP_TOPOL = expand_path("gromacs/grompp_topol.top")
+    GROMACS_DSSP_TPR = expand_path("gromacs/<FILE>.tpr")
+    GROMACS_DSSP_XTC = expand_path("gromacs/<FILE>.xtc")
+    GROMACS_TS_CLUSTERS = expand_path("gromacs/clusters_ts_example.xvg")
+    MMPBSA_TPR = expand_path("gromacs/protein/1BVG.tpr")
+    MMPBSA_XTC = expand_path("gromacs/protein/1BVG.xtc")
+    MMPBSA_TOP = expand_path("gromacs/protein/1BVG.top")
+    MMPBSA_CUSTOM_INPUT = expand_path("gromacs/test_input_mmpbsa.in")
+    MMPBSA_POSRE = expand_path("gromacs/protein/posre.itp")
+    MMPBSA_LIG_POSRE = expand_path("gromacs/protein/posre_DMP:100.itp")
+    MMPBSA_LIG_ITP = expand_path("gromacs/protein/DMP:100.itp")
+
+    FEP_PLUS_DOCKING_PV = expand_path("fep_plus/set_pv.maegz")
+    FEP_PLUS_EXAMPLE_FMP = expand_path("fep_plus/out.fmp")
+    FEP_PLUS_MAP_LOG = expand_path("fep_plus/fep_mapper.log")
+    FEP_PLUS_MAP_LOG_MIN = expand_path("fep_plus/fep_mapper_min.log")
+    FEP_PLUS_MAP_LOG_SINGLE_EDGE = expand_path("fep_plus/fep_mapper_single_edge.log")
+
+    FEP_PLUS_LIGANDS = expand_path("fep_plus/ligprep_confs.sdf")
+    FEP_PLUS_EXAMPLE_FMP_OUT = expand_path("fep_plus/test_out.fmp")
+    FEP_PLUS_MULTISIM = expand_path("fep_plus/multisim.log")
+    FEP_PLUS_PROTEIN = expand_path("fep_plus/<FILE>.pdb")
+    FEP_PLUS_OTHER_PROTEIN = expand_path("fep_plus/<FILE>_apo.pdb")
+    FEP_PLUS_MULTISIM_LONG = expand_path("fep_plus/multisim.log")
+
+    MODEL_BUILDER_EXAMPLE_JSON = expand_path(
+        "model_building/OptunaAZ_example_config.json"
+    )
+    MODEL_BUILDER_TEST_INPUT_SDF = expand_path("model_building/test_input_data.sdf")
+    PRIME_POSEVIEWER = expand_path("molecules/1CX2/1cx2_poseviewer_pv.maegz")
+    COX2_ACTIVES_DOCKED = expand_path("molecules/1CX2/docked_actives.sdf")
+    LIGANDS_1UYD = expand_path("prime/glide_docked.sdf")
+    RECEPTOR_1UYD = expand_path("molecules/1UYD/1UYDreceptor.pdb")
+
+    CAVITY_TRJ_FOLDER = expand_path("cavity_explorer/parch_align_trj")
+    CAVITY_DTR_FILE = expand_path("cavity_explorer/parch_align_trj/clickme.dtr")
+    CAVITY_CMS_FILE = expand_path("cavity_explorer/parch_align_trj/out.cms")
+    MDPOCKET_XTC_FILE = expand_path("cavity_explorer/structure_out_0.xtc")
+    MDPOCKET_PDB_FILE = expand_path("cavity_explorer/structure_0_wet.pdb")
+    MDPOCKET_PDB_FILE_DRY = expand_path("cavity_explorer/structure_0.pdb")
+    MD_POCKET_DESMOND_TOP = expand_path("cavity_explorer/top.pdb")
+
+    DESMOND_SETUP_PDB = expand_path("desmond/1cx2.pdb")
+    DESMOND_PRODUCTION_CMS = expand_path("desmond/setup.cms")
+    TEST_FASTA_FILE = expand_path("structure_prediction/1acw.fasta")
+
+    LIGAND_HYBRID_TEST_DIR = expand_path("pmx/lig_hybrid_work_dir")
+    PREPARE_SIMULATIONS_TEST_DIR = expand_path("pmx/prepare_simulations_work_dir")
+    ATOM_MAPPING_TEST_DIR = expand_path("pmx/atom_mapping_work_dir")
+    ASSEMBLE_SYSTEMS_TEST_DIR = expand_path("pmx/assemble_systems_work_dir")
+    BOX_WATER_IONS_TEST_DIR = expand_path("pmx/box_water_ions_work_dir")
+    PREPARE_TRANSITIONS_TEST_DIR = expand_path("pmx/prepare_transitions_work_dir")
+    RUN_ANALYSIS_TEST_DIR = expand_path("pmx/analysis_test_dir")
+
+    RUN_SIMULATIONS_TEST_DIR = expand_path("pmx/run_simulations_work_dir")
+    PMX_FEP_MAP_LOG_PREPARE_TRANSITIONS = expand_path(
+        "pmx/prepare_transitions_work_dir/fep_mapper.log"
+    )
+    PMX_LIG1_INPUT_PDB = expand_path("pmx/input/lig_18625-1.pdb")
+    PMX_ABFE_INPUT_COMPLEX = expand_path("pmx/abfe/1BVG.pdb")
+    PMX_ABFE_INPUT_LIGAND = expand_path("pmx/abfe/az_ligand.pdb")
+    PMX_LIG2_INPUT_PDB = expand_path("pmx/input/lig_18626-1.pdb")
+    PMX_LIG1_INPUT_ITP = expand_path("pmx/input/lig_18625-1.itp")
+    PMX_LIG2_INPUT_ITP = expand_path("pmx/input/lig_18626-1.itp")
+    PMX_MAPPED_PAIRS1_DAT = expand_path("pmx/input/pairs1.dat")
+    PMX_MAPPED_PAIRS2_DAT = expand_path(
+        "pmx/input/pairs2.dat"
+    )  # seems to be identical to 1, but in all cases
+    PMX_LIG1_INPUT_MAPPED_PDB = expand_path("pmx/input/out_atommap_lig1.pdb")
+    PMX_MDP_FILES = expand_path("pmx/mdppath")
+    PMX_LIG2_INPUT_MAPPED_PDB = expand_path("pmx/input/out_atommap_lig2.pdb")
+    DSSP_PDB_1 = expand_path("structure_prediction/1e0n.pdb")
+    DSSP_PDB_2 = expand_path("structure_prediction/1jbf.pdb")
+    DSSP_PDB_3 = expand_path("structure_prediction/6nox.pdb")
+
+    # try to find the internal value and return
+
+    # try to find the internal value and return
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+
+    # prohibit any attempt to set any values
+    def __setattr__(self, key, value):
+        raise ValueError("No changes allowed.")
+
+
+def get_mol_as_Compound(abs_path: str, compound_number: int = 0) -> Compound:
+    mol_supplier = Chem.SDMolSupplier(abs_path, removeHs=False)
+    for mol in mol_supplier:
+        enum = Enumeration(
+            smile=to_smiles(mol), original_smile=to_smiles(mol), molecule=mol
+        )
+        comp = Compound(
+            name=os.path.basename(abs_path), compound_number=compound_number
+        )
+        comp.add_enumeration(enum, auto_update=True)
+        return comp
+
+
+def get_1UYD_ligands_as_Compounds(abs_path: str) -> List[Compound]:
+    comp_list = []
+    mol_supplier = Chem.SDMolSupplier(abs_path, removeHs=False)
+    for cur_id, mol in enumerate(mol_supplier):
+        enum = Enumeration(
+            smile=to_smiles(mol), original_smile=to_smiles(mol), molecule=mol
+        )
+        comp = Compound(name=mol.GetProp("_Name"), compound_number=cur_id)
+        comp.add_enumeration(enum, auto_update=True)
+        comp_list.append(comp)
+    return comp_list
+
+
+def construct_full_compound_object(source) -> List[Compound]:
+    def _get_existing_enumeration(comp_id, enum_id):
+        comp = _get_existing_compound(comp_id)
+        for enum in comp.get_enumerations():
+            if enum.get_enumeration_id() == int(enum_id):
+                return enum
+        raise ValueError
+
+    def _get_existing_compound(idx):
+        for comp in list_compounds:
+            if int(idx) == comp.get_compound_number():
+                return comp
+        raise ValueError
+
+    list_compounds = []
+    for mol in Chem.SDMolSupplier(source, removeHs=False):
+        new_compound = False
+        new_enumeration = False
+        mol_name = mol.GetProp(_WE.RDKIT_NAME)
+        # assuming the mol name follows Icolos conventions
+        try:
+            id_parts = mol_name.split(":")
+            comp_id = id_parts[0]
+            enum_id = id_parts[1]
+
+        except:
+            comp_id = mol_name
+            enum_id = 0
+        try:
+            # try to find an existing compound with the correct name
+            compound = _get_existing_compound(idx=comp_id)
+        except ValueError:
+            # the compound does not yet exist, create the object
+            new_compound = True
+            compound = Compound(name=comp_id, compound_number=comp_id)
+        try:
+            # check whether the enumeration exists
+            enumeration = _get_existing_enumeration(comp_id, enum_id)
+        except ValueError:
+            new_enumeration = True
+            enumeration = Enumeration(
+                smile=to_smiles(mol), molecule=mol, original_smile=to_smiles(mol)
+            )
+
+        if len(id_parts) == 3 and id_parts[2] == "0":
+            # i.e. 0:0:0, we have a conformer
+            conf = Conformer(
+                conformer=mol,
+                enumeration_object=enumeration,
+                conformer_id=int(id_parts[2]),
+            )
+            enumeration.add_conformer(conf, auto_update=True)
+
+        if new_enumeration:
+            compound.add_enumeration(enumeration, auto_update=True)
+        if new_compound:
+            list_compounds.append(compound)
+    return list_compounds
+
+
+def get_ligands_as_compounds_with_conformers(
+    abs_path: str, poseviewer=None
+) -> List[Compound]:
+    comp_list = []
+    mol_supplier = Chem.SDMolSupplier(abs_path, removeHs=False)
+    for cur_id, mol in enumerate(mol_supplier):
+
+        #
+        enum = Enumeration(
+            smile=to_smiles(mol), original_smile=to_smiles(mol), molecule=mol
+        )
+        conf = Conformer(conformer=mol)
+        if poseviewer is not None:
+            conf.add_extra_data("structures_pv.maegz", data=poseviewer)
+        enum.add_conformer(conf)
+        comp = Compound(name=mol.GetProp("_Name"), compound_number=cur_id)
+        comp.add_enumeration(enum, auto_update=True)
+        comp_list.append(comp)
+    return comp_list
+
+
+def get_docked_ligands_as_conformers(abs_path: str, poseviewer=None) -> List[Compound]:
+    mol_supplier = Chem.SDMolSupplier(abs_path, removeHs=False)
+    comp = Compound(name="test_poses", compound_number=1)
+    enum = Enumeration()
+    for cur_id, mol in enumerate(mol_supplier):
+        conf = Conformer(conformer=mol, conformer_id=cur_id)
+
+        if cur_id == 0 and poseviewer is not None:
+            conf.add_extra_data(key="structures_pv.maegz", data=poseviewer)
+
+        enum.add_conformer(conf)
+
+    comp.add_enumeration(enum)
+    return [comp]
+
+
+def get_mol_as_Conformer(abs_path: str) -> List[Conformer]:
+    list_return = []
+    mol_supplier = Chem.SDMolSupplier(abs_path, removeHs=False)
+    for mol in mol_supplier:
+        list_return.append(Conformer(conformer=mol))
+    return list_return
+
+
+def get_test_Compounds_without_molecules(
+    compound_numbers: List[int] = [0],
+) -> Dict[str, Compound]:
+    """These compounds have neither a molecule in the enumeration nor any Conformer, i.e. no 3D structure."""
+    aspirin = Compound(name="Aspirin", compound_number=compound_numbers[0])
+    aspirin.add_enumeration(
+        Enumeration(
+            compound_object=aspirin,
+            smile="O=C(C)Oc1ccccc1C(=O)O",
+            original_smile="O=C(C)Oc1ccccc1C(=O)O",
+        )
+    )
+    return {"Aspirin": aspirin}
+
+
+def load_SDF_docked(abs_path: int) -> List[Compound]:
+    compounds = []
+    mol_supplier = Chem.SDMolSupplier(abs_path, removeHs=False)
+    for mol_id, mol in enumerate(mol_supplier):
+        comp = Compound(compound_number=mol_id)
+        enum = Enumeration(
+            smile=str(mol.GetProp("smiles")),
+            original_smile=str(mol.GetProp("original_smiles")),
+        )
+        conf = Conformer(conformer=mol)
+        enum.add_conformer(conf, auto_update=True)
+        comp.add_enumeration(enum, auto_update=True)
+        compounds.append(comp)
+    return compounds
+
+
+def directory_to_generic(path: str) -> List[GenericData]:
+    """converts all files in a given path to generic data and returns a list with them"""
+    generic_files = []
+
+    for r, d, f in os.walk(path):
+        for file in f:
+            try:
+                with open(os.path.join(r, file), "r") as read_file:
+                    data = read_file.read()
+                    file_name = file.split("/")[-1]
+                    generic_file = GenericData(file_name=file_name, file_data=data)
+                    generic_files.append(generic_file)
+            except UnicodeDecodeError:
+                with open(os.path.join(r, file), "rb") as read_file:
+                    data = read_file.read()
+                    file_name = file.split("/")[-1]
+                    generic_file = GenericData(file_name=file_name, file_data=data)
+                    generic_files.append(generic_file)
+    return generic_files
diff --git a/unit_tests.py b/unit_tests.py
new file mode 100644
index 0000000..6022401
--- /dev/null
+++ b/unit_tests.py
@@ -0,0 +1,29 @@
+import unittest
+
+from tests.CREST import *
+from tests.OMEGA import *
+from tests.XTB import *
+from tests.Turbomole import *
+from tests.cosmo import *
+from tests.clustering import *
+from tests.rms_filter import *
+from tests.boltzmann_weighting import *
+from tests.composite_agents import *
+from tests.containers import *
+from tests.io import *
+from tests.feature_counter import *
+from tests.prediction import *
+from tests.step_utils import *
+from tests.schrodinger import *
+from tests.autodockvina import *
+from tests.panther import *
+from tests.shaep import *
+from tests.gromacs import *
+from tests.cavity_explorer import *
+from tests.structure_prediction import *
+from tests.rmsd import *
+from tests.flow_control import *
+from tests.pmx import *
+
+if __name__ == "__main__":
+    unittest.main()