Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature solvent forcefield inclusion #151

Merged
merged 9 commits into from
Jul 10, 2024
2 changes: 2 additions & 0 deletions environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ dependencies:
- openmm >= 7.6
- pdbfixer >= 1.5
- mdtraj
- openff-toolkit
- openmmforcefields
- pip
- pip:
- flake8
Expand Down
173 changes: 144 additions & 29 deletions mdagent/tools/base_tools/simulation_tools/setup_and_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
from typing import Any, Dict, List, Optional, Type

import langchain
import requests
import streamlit as st
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.tools import BaseTool
from openff.toolkit.topology import Molecule
from openmm import (
AndersenThermostat,
BrownianIntegrator,
Expand Down Expand Up @@ -44,7 +46,9 @@
StateDataReporter,
)
from openmm.unit import bar, femtoseconds, kelvin, nanometers, picosecond, picoseconds
from openmmforcefields.generators import SMIRNOFFTemplateGenerator
from pydantic import BaseModel, Field
from rdkit import Chem

# Local Library/Application Imports
from mdagent.utils import FileType, PathRegistry
Expand Down Expand Up @@ -683,7 +687,19 @@ def setup_system(self):
self.pdb_path = self.path_registry.get_mapped_path(self.pdb_id)
self.pdb = PDBFile(self.pdb_path)
self.forcefield = ForceField(*self.params["forcefield_files"])
self.system = self._create_system(self.pdb, self.forcefield, **self.sys_params)
try:
self.system = self._create_system(
self.pdb, self.forcefield, **self.sys_params
)
print("System built successfully")
print(self.system)
except ValueError as e:
if "No template found for" in str(e):
raise ValueError(str(e))
else:
raise ValueError(
f"Error building system. Please check the forcefield files {str(e)}"
)

if self.sys_params.get("nonbondedMethod", None) in [
CutoffPeriodic,
Expand Down Expand Up @@ -873,40 +889,125 @@ def _create_system(

# if use_constraint_tolerance:
# constraintTolerance = system_params.pop('constraintTolerance')
print("About to create system...")
self.modeller = Modeller(pdb.topology, pdb.positions)
if solvate:
try:
self.modeller.addSolvent(forcefield)
except ValueError as e:
print("Error adding solvent", type(e).__name__, "–", e)
if "No Template for" in str(e):
raise ValueError(str(e))
except AttributeError as e:
print("Error adding solvent: ", type(e).__name__, "–", e)
print("Trying to add solvent with 1 nm padding")
if "NoneType" and "value_in_unit" in str(e):
try:
self.modeller.addSolvent(forcefield, padding=1 * nanometers)
except Exception as e:
print("Error adding solvent", type(e).__name__, "–", e)
raise (e)
except Exception as e:
if "Cannot neutralize the system because the" in str(e):
try:
self.modeller.addSolvent(forcefield, padding=1 * nanometers)
except Exception as e:
print("Error adding solvent", type(e).__name__, "–", e)
attempts = 0
solvent_list = ["MOH", "EOH", "HOH", "ACN", "URE", "DMS", "DMF", "GOL", "BNZ"]
while attempts < 3:
print(f"Attempts at creating system: {attempts}/3")
if solvate:
try:
self.modeller.addSolvent(forcefield)
except ValueError as e:
print("Error adding solvent", type(e).__name__, "–", e)
if "No template found for" in str(e):
smiles = self._error_to_smiles(e, solvent_list)

molecule = Molecule.from_smiles(smiles)
smirnoff = SMIRNOFFTemplateGenerator(molecules=molecule)
forcefield.registerTemplateGenerator(smirnoff.generator)
attempts += 1
print(
f"Attempt {attempts} to add small \
molecules to forcefield."
)
continue
else:
raise ValueError(str(e))

except AttributeError as e:
print("Error adding solvent: ", type(e).__name__, "–", e)
print("Trying to add solvent with 1 nm padding")
if "NoneType" and "value_in_unit" in str(e):
try:
self.modeller.addSolvent(forcefield, padding=1 * nanometers)
except Exception as e:
print("Error adding solvent", type(e).__name__, "–", e)
raise (e)
except Exception as e:
if "Cannot neutralize the system because the" in str(e):
try:
self.modeller.addSolvent(forcefield, padding=1 * nanometers)
except Exception as e:
print("Error adding solvent", type(e).__name__, "–", e)
raise (e)
else:
print("Exception: ", str(e))
raise (e)
else:
print("Exception: ", str(e))
raise (e)

system = forcefield.createSystem(self.modeller.topology, **system_params)
else:
system = forcefield.createSystem(self.modeller.topology, **system_params)
system = forcefield.createSystem(
self.modeller.topology, **system_params
)
break
else:
try:
print("adding system without solvent")
system = forcefield.createSystem(
self.modeller.topology, **system_params
)
break
except ValueError as e:
if "No template found for" in str(e):
print("Trying to add component to Forcefield...")
smiles = self._error_to_smiles(e, solvent_list)

molecule = Molecule.from_smiles(smiles)
smirnoff = SMIRNOFFTemplateGenerator(molecules=molecule)
forcefield.registerTemplateGenerator(smirnoff.generator)
attempts += 1
print(
f"Attempt {attempts} to add small \
molecules to forcefield."
)
continue
else:
raise ValueError(str(e))

if attempts == 3:
raise ValueError("Could not create system after 3 attemps.")
return system

def _code_to_smiles(
self, query: str
) -> (
str | None
): # from https://github.com/ur-whitelab/chemcrow-public/blob/main/chemcrow/tools/databases.py
url = " https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/{}"
r = requests.get(url.format(query, "property/IsomericSMILES/JSON"))
# convert the response to a json object
data = r.json()
# return the SMILES string
try:
smi = data["PropertyTable"]["Properties"][0]["IsomericSMILES"]
except KeyError:
return None
return smi

def _error_to_smiles(self, e, solvent_list):
pattern = r"residue \d+ \((\w+)\)"
# Search for the pattern in the error message
match = re.search(pattern, str(e))
if not match:
print("No residue code found in the error message.")
raise ValueError(str(e))
residue_code = match.group(1)
print(f"Residue code: {residue_code}")
if residue_code not in solvent_list:
print(
"Residue code not in solvent list. Adding forcefield \
not supported."
)
raise ValueError(str(e))

print("Trying to add missing component to Forcefield...")
smiles = self._code_to_smiles(residue_code)
if not smiles:
print("No SMILES found for HET code.")
raise ValueError(str(e))

print(f"Found SMILES from HET code: {smiles}")
return smiles

def unit_to_string(self, unit):
"""Needed to convert units to strings for the script
Otherwise internal __str()__ method makes the script
Expand Down Expand Up @@ -1084,6 +1185,20 @@ def _construct_script_content(
"""
return script_content

def het_to_smiles(het_code):
try:
# Fetch the molecule using RDKit's PDB parser
molecule = Chem.MolFromPDBCode(het_code)

if molecule:
# Convert the molecule to SMILES
smiles = Chem.MolToSmiles(molecule)
return smiles
else:
return "Invalid HET code or molecule not found."
except Exception as e:
return str(e)

def write_standalone_script(self, filename="reproduce_simulation.py"):
"""Extracting parameters from the class instance
Inspired by the code snippet provided from openmm-setup
Expand Down
Loading