Skip to content

Commit

Permalink
Merge pull request #27 from MolecularAI/prep-for-release-1.8.0
Browse files Browse the repository at this point in the history
Prep for release 1.8.0
  • Loading branch information
SGenheden authored Dec 18, 2024
2 parents 2ad07a2 + c165f3c commit 65e9809
Show file tree
Hide file tree
Showing 67 changed files with 2,635 additions and 1,853 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ repos:
- id: check-added-large-files
- id: check-merge-conflict
- repo: https://github.com/psf/black
rev: 22.0.0
rev: 24.1.0
hooks:
- id: black
# We should add some linter to here at some point
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Then execute the following commands in the root of the repository

conda env create -f env-dev.yml
conda activate rxn-env
poetry install
poetry install --with dev

the `rxnutils` package is now installed in editable mode.

Expand Down
4 changes: 2 additions & 2 deletions env-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ channels:
- https://conda.anaconda.org/conda-forge
- defaults
dependencies:
- python>=3.9,<3.11
- poetry>=1.1.4,<2.0
- python>=3.9,<3.13
- poetry>=1.2.0,<2.0
2,152 changes: 1,198 additions & 954 deletions poetry.lock

Large diffs are not rendered by default.

39 changes: 23 additions & 16 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ packages = [
]

[tool.poetry.dependencies]
python = ">=3.9,<3.11"
urllib3 = "<2.0"
pandas = "^1.0.0"
python = ">=3.9,<3.13"
urllib3 = "^1.2.26"
pandas = ">=1.0.0,<3.0.0"
xxhash = "^2.0.0"
rdchiral = "^1.1.0"
PyYAML = "^6.0.1"
Expand All @@ -30,21 +30,25 @@ cgrtools = "^4.1.35"
scipy = "^1.11.4"
pydantic = "^2.8.2"
apted = "^1.0.3"
dask = ">=2024.4.1"
onnxruntime = {version = "<1.17.0", optional=true}

[tool.poetry.dev-dependencies]
pytest = "^6.2.2"
pytest-datadir = "^1.3.1"
pytest-mock = "^3.7.0"
pytest-mccabe = "^2.0"
pytest-black = "^0.3.12"
pytest-cov = "^3.0.0"
black = "^22.0.0"
mypy = "^0.800"
pre-commit = "^2.10.1"
[tool.poetry.group.dev.dependencies]
pytest = "^8.3.3"
pytest-datadir = "^1.5.0"
pytest-mock = "^3.14.0"
pytest-cov = "^6.0.0"
requests-mock = "^1.12.1"
black = "^24.10.0"
mypy = "^1.13.0"
pre-commit = "^4.0.1"
ipython = "^7.21.0"
pylint = "^2.14.1"
invoke = "^1.7.1"
Sphinx = "^7.3.7"
pylint = "^3.3.1"
invoke = "^2.2.0"
sphinx = "<8.1.0"

[tool.poetry.extras]
models = ["onnxruntime"]

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand All @@ -57,3 +61,6 @@ max-attributes = 15
max-public-methods = 25
min-public-methods = 0
disable = "W1203, W0707, W1514, W0602, typecheck"

[tool.coverage.run]
relative_files = true
4 changes: 3 additions & 1 deletion rxnutils/chem/augmentation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
""" Routines for augmenting chemical reactions
"""

from rxnutils.chem.utils import split_rsmi

_SINGLE_REACTANT_REAGENTS = {"10.1.1": "Br", "10.1.2": "Cl"}


Expand All @@ -12,7 +14,7 @@ def single_reactant_augmentation(smiles: str, classification: str) -> str:
:param classification: the classification of the reaction or an empty string
:return: the processed SMILES
"""
reactants = smiles.split(">")[0]
reactants = split_rsmi(smiles)[0]
if "." in reactants:
return smiles
classification = classification.split(" ")[0]
Expand Down
29 changes: 8 additions & 21 deletions rxnutils/chem/cgr.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" Wrapper class for the CGRTools library
"""

import io
import warnings
from typing import List
Expand Down Expand Up @@ -28,20 +29,14 @@ def __init__(self, reaction: ChemicalReaction) -> None:
self._cgr_reactants = []
self._cgr_products = []
self._make_cgr_containers()
self.reaction_container = ReactionContainer(
reactants=self._cgr_reactants, products=self._cgr_products
)
self.reaction_container = ReactionContainer(reactants=self._cgr_reactants, products=self._cgr_products)
try:
self.cgr_container = self.reaction_container.compose()
except ValueError as err:
if str(err) == "mapping of graphs is not disjoint":
raise ValueError(
"Reaction contains inconsistent atom-mapping, perhaps duplicates"
)
raise ValueError("Reaction contains inconsistent atom-mapping, perhaps duplicates")
elif str(err).endswith("} not equal"):
raise ValueError(
"Atom with the same atom-mapping in reactant and product is not equal"
)
raise ValueError("Atom with the same atom-mapping in reactant and product is not equal")
else:
raise ValueError(f"Unknown problem with generating CGR: {err}")

Expand All @@ -58,10 +53,7 @@ def bonds_broken(self) -> int:
@property
def bonds_changed(self) -> int:
"""Returns the number of broken or formed bonds in the reaction"""
return sum(
bond.p_order is None or bond.order is None
for _, _, bond in self.cgr_container.bonds()
)
return sum(bond.p_order is None or bond.order is None for _, _, bond in self.cgr_container.bonds())

@property
def bonds_formed(self) -> int:
Expand All @@ -71,9 +63,7 @@ def bonds_formed(self) -> int:
@property
def total_centers(self) -> int:
"""Returns the number of atom and bond centers in the reaction"""
return len(self.cgr_container.center_atoms) + len(
self.cgr_container.center_bonds
)
return len(self.cgr_container.center_atoms) + len(self.cgr_container.center_bonds)

def distance_to(self, other: "CondensedGraphReaction") -> int:
"""
Expand Down Expand Up @@ -104,14 +94,11 @@ def _make_renumbered_mols(self):
# so this adds safe atom-mapping to un-mapped atoms
renumbered_mols = []
max_atom_map_numb = max(
max(atom_mapping_numbers(smi) or [0])
for smi in self.reaction.reactants_list + self.reaction.products_list
max(atom_mapping_numbers(smi) or [0]) for smi in self.reaction.reactants_list + self.reaction.products_list
)
for mol0 in self.reaction.reactants + self.reaction.products:
if mol0 is None:
raise ValueError(
"Cannot create CGR for this reaction, some molecules are None"
)
raise ValueError("Cannot create CGR for this reaction, some molecules are None")
mol = Chem.rdchem.Mol(mol0)
for atom in mol.GetAtoms():
if not atom.GetAtomMapNum():
Expand Down
17 changes: 7 additions & 10 deletions rxnutils/chem/disconnection_sites/atom_map_tagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import pandas as pd
from rdkit import Chem

from rxnutils.chem.utils import split_rsmi


def _get_atom_identifier(atom: Chem.rdchem.Atom) -> str:
"""
Expand All @@ -21,9 +23,7 @@ def _get_atom_identifier(atom: Chem.rdchem.Atom) -> str:
return str(atom_id)


def _get_bond_environment_identifier(
atoms: Sequence[Chem.rdchem.Atom], bond: Chem.rdchem.Bond
) -> str:
def _get_bond_environment_identifier(atoms: Sequence[Chem.rdchem.Atom], bond: Chem.rdchem.Bond) -> str:
"""
Get the environment of a specific bond.
Expand Down Expand Up @@ -79,17 +79,14 @@ def get_atom_list(reactants_smiles: str, product_smiles: str) -> List[int]:
ordered_reactant_neighbor_dict = _get_atomic_neighborhoods(reactants_smiles)
ordered_product_neighbor_dict = _get_atomic_neighborhoods(product_smiles)

all_indices = set(ordered_product_neighbor_dict.keys()) | set(
ordered_reactant_neighbor_dict.keys()
)
all_indices = set(ordered_product_neighbor_dict.keys()) | set(ordered_reactant_neighbor_dict.keys())

# Checks to see equivlence of atomic enviroments.
# If environment changed, then add atom to list
atom_list = [
atom_map
for atom_map in all_indices
if ordered_reactant_neighbor_dict.get(atom_map, [])
!= ordered_product_neighbor_dict.get(atom_map, [])
if ordered_reactant_neighbor_dict.get(atom_map, []) != ordered_product_neighbor_dict.get(atom_map, [])
]

return atom_list
Expand All @@ -104,7 +101,7 @@ def atom_map_tag_reactants(mapped_rxn: str) -> str:
:return: SMILES of the reactants containing tags corresponding to atoms changed in the
reaction.
"""
reactants_smiles, _, product_smiles = mapped_rxn.split(">")
reactants_smiles, _, product_smiles = split_rsmi(mapped_rxn)

reactants_mol = Chem.MolFromSmiles(reactants_smiles)
atom_list = get_atom_list(reactants_smiles, product_smiles)
Expand All @@ -128,7 +125,7 @@ def atom_map_tag_products(mapped_rxn: str) -> str:
:return: SMILES of the product containing tags corresponding to atoms changed in the
reaction.
"""
reactants_smiles, _, product_smiles = mapped_rxn.split(">")
reactants_smiles, _, product_smiles = split_rsmi(mapped_rxn)

product_mol = Chem.MolFromSmiles(product_smiles)
atom_list = get_atom_list(reactants_smiles, product_smiles)
Expand Down
51 changes: 13 additions & 38 deletions rxnutils/chem/disconnection_sites/tag_converting.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,21 @@ def smiles_tokens(smiles: str) -> List[str]:
:param smiles: SMILES to tokenize
:return: List of tokens identified in SMILES.
"""
pattern = r"(\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\(|\)|\.|=|#|-|\+|\\\\|\\|\/|:|~|@|\?|>|\*|\!|\$|\%[0-9]{2}|[0-9])"
pattern = (
r"(\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\(|\)|\.|=|#|-|\+|\\\\|\\|\/|:|~|@|\?|>|\*|\!|\$|\%[0-9]{2}|[0-9])"
)
regex = re.compile(pattern)
tokens = [token for token in regex.findall(smiles)]

tokenized_smiles = "".join(tokens)
if smiles != tokenized_smiles:
raise AssertionError(
f"tokenized SMILES not the same as input SMILES: {tokenized_smiles}, "
"{smiles}, tokens: {tokens}"
f"tokenized SMILES not the same as input SMILES: {tokenized_smiles}, " "{smiles}, tokens: {tokens}"
)
return tokens


def _next_tagged_token(
product_tagged_tokens: List[str], untagged_token: str, tagged_token_idx: int
) -> Tuple[str, int]:
def _next_tagged_token(product_tagged_tokens: List[str], untagged_token: str, tagged_token_idx: int) -> Tuple[str, int]:
"""
Get the next tagged token in the sequence. Includes checks and fixes for
stereochemistry changes due to removing atom mapping.
Expand All @@ -51,19 +50,13 @@ def _next_tagged_token(
tagged_token_idx += 1
return product_tagged_tokens[tagged_token_idx], tagged_token_idx

if (
tagged_token != untagged_token
and not ":1" in tagged_token
and "@" in tagged_token
):
if tagged_token != untagged_token and not ":1" in tagged_token and "@" in tagged_token:
return untagged_token, tagged_token_idx

return tagged_token, tagged_token_idx


def tagged_smiles_from_tokens(
product_tagged_tokens: List[str], product_untagged_tokens: List[str]
) -> Tuple[str, str]:
def tagged_smiles_from_tokens(product_tagged_tokens: List[str], product_untagged_tokens: List[str]) -> Tuple[str, str]:
"""
Convert the tagged SMILES from atom-mapping to unmapped-token + '!'
Expand All @@ -81,24 +74,16 @@ def tagged_smiles_from_tokens(

for untagged_token in product_untagged_tokens:

tagged_token, tagged_token_idx = _next_tagged_token(
product_tagged_tokens, untagged_token, tagged_token_idx
)
tagged_token, tagged_token_idx = _next_tagged_token(product_tagged_tokens, untagged_token, tagged_token_idx)

if tagged_token != untagged_token and (
untagged_token == "/" or untagged_token == "\\"
):
if tagged_token != untagged_token and (untagged_token == "/" or untagged_token == "\\"):
continue

if tagged_token == untagged_token:
product_converted += untagged_token
else:
# Remove brackets around a single letter
if (
len(untagged_token) == 3
and untagged_token.startswith("[")
and untagged_token.endswith("]")
):
if len(untagged_token) == 3 and untagged_token.startswith("[") and untagged_token.endswith("]"):
untagged_token = untagged_token[1]
product_converted += untagged_token + "!"

Expand All @@ -109,9 +94,7 @@ def tagged_smiles_from_tokens(
return product_converted, product_untagged


def _canonicalize_tagged_smiles(
product_tagged: str, product_untagged: str = None
) -> Tuple[str, str]:
def _canonicalize_tagged_smiles(product_tagged: str, product_untagged: str = None) -> Tuple[str, str]:
"""
Reorder the tagged-product SMILES on canonical form using the canonicalized
untagged product.
Expand All @@ -123,13 +106,7 @@ def _canonicalize_tagged_smiles(
mol = Chem.MolFromSmiles(product_tagged)
mol_untagged = Chem.MolFromSmiles(product_untagged)

_, canonical_atom_order = tuple(
zip(
*sorted(
[(j, i) for i, j in enumerate(Chem.CanonicalRankAtoms(mol_untagged))]
)
)
)
_, canonical_atom_order = tuple(zip(*sorted([(j, i) for i, j in enumerate(Chem.CanonicalRankAtoms(mol_untagged))])))

mol = Chem.RenumberAtoms(mol, canonical_atom_order)
mol_untagged = Chem.RenumberAtoms(mol_untagged, canonical_atom_order)
Expand Down Expand Up @@ -158,9 +135,7 @@ def convert_atom_map_tag(product_atom_map_tagged: str) -> str:
if not Chem.MolFromSmiles(product_untagged):
return ""

product_tagged, product_untagged = _canonicalize_tagged_smiles(
product_atom_map_tagged, product_untagged
)
product_tagged, product_untagged = _canonicalize_tagged_smiles(product_atom_map_tagged, product_untagged)

# Update the SMILES string to remove atom-mapping brackets and explicit [H]:s and
# replace by <atom>!
Expand Down
Empty file.
Loading

0 comments on commit 65e9809

Please sign in to comment.