Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prep for release 1.8.0 #27

Merged
merged 4 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
SGenheden marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ repos:
- id: check-added-large-files
- id: check-merge-conflict
- repo: https://github.com/psf/black
rev: 22.0.0
rev: 24.1.0
hooks:
- id: black
# We should add some linter to here at some point
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Then execute the following commands in the root of the repository

conda env create -f env-dev.yml
conda activate rxn-env
poetry install
poetry install --with dev

the `rxnutils` package is now installed in editable mode.

Expand Down
4 changes: 2 additions & 2 deletions env-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ channels:
- https://conda.anaconda.org/conda-forge
- defaults
dependencies:
- python>=3.9,<3.11
- poetry>=1.1.4,<2.0
- python>=3.9,<3.13
- poetry>=1.2.0,<2.0
2,152 changes: 1,198 additions & 954 deletions poetry.lock

Large diffs are not rendered by default.

39 changes: 23 additions & 16 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ packages = [
]

[tool.poetry.dependencies]
python = ">=3.9,<3.11"
urllib3 = "<2.0"
pandas = "^1.0.0"
python = ">=3.9,<3.13"
urllib3 = "^1.2.26"
pandas = ">=1.0.0,<3.0.0"
xxhash = "^2.0.0"
rdchiral = "^1.1.0"
PyYAML = "^6.0.1"
Expand All @@ -30,21 +30,25 @@ cgrtools = "^4.1.35"
scipy = "^1.11.4"
pydantic = "^2.8.2"
apted = "^1.0.3"
dask = ">=2024.4.1"
onnxruntime = {version = "<1.17.0", optional=true}

[tool.poetry.dev-dependencies]
pytest = "^6.2.2"
pytest-datadir = "^1.3.1"
pytest-mock = "^3.7.0"
pytest-mccabe = "^2.0"
pytest-black = "^0.3.12"
pytest-cov = "^3.0.0"
black = "^22.0.0"
mypy = "^0.800"
pre-commit = "^2.10.1"
[tool.poetry.group.dev.dependencies]
pytest = "^8.3.3"
pytest-datadir = "^1.5.0"
pytest-mock = "^3.14.0"
pytest-cov = "^6.0.0"
requests-mock = "^1.12.1"
black = "^24.10.0"
mypy = "^1.13.0"
pre-commit = "^4.0.1"
ipython = "^7.21.0"
pylint = "^2.14.1"
invoke = "^1.7.1"
Sphinx = "^7.3.7"
pylint = "^3.3.1"
invoke = "^2.2.0"
sphinx = "<8.1.0"

[tool.poetry.extras]
models = ["onnxruntime"]

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand All @@ -57,3 +61,6 @@ max-attributes = 15
max-public-methods = 25
min-public-methods = 0
disable = "W1203, W0707, W1514, W0602, typecheck"

[tool.coverage.run]
relative_files = true
4 changes: 3 additions & 1 deletion rxnutils/chem/augmentation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
""" Routines for augmenting chemical reactions
"""

from rxnutils.chem.utils import split_rsmi

_SINGLE_REACTANT_REAGENTS = {"10.1.1": "Br", "10.1.2": "Cl"}


Expand All @@ -12,7 +14,7 @@ def single_reactant_augmentation(smiles: str, classification: str) -> str:
:param classification: the classification of the reaction or an empty string
:return: the processed SMILES
"""
reactants = smiles.split(">")[0]
reactants = split_rsmi(smiles)[0]
if "." in reactants:
return smiles
classification = classification.split(" ")[0]
Expand Down
29 changes: 8 additions & 21 deletions rxnutils/chem/cgr.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" Wrapper class for the CGRTools library
"""

import io
import warnings
from typing import List
Expand Down Expand Up @@ -28,20 +29,14 @@ def __init__(self, reaction: ChemicalReaction) -> None:
self._cgr_reactants = []
self._cgr_products = []
self._make_cgr_containers()
self.reaction_container = ReactionContainer(
reactants=self._cgr_reactants, products=self._cgr_products
)
self.reaction_container = ReactionContainer(reactants=self._cgr_reactants, products=self._cgr_products)
try:
self.cgr_container = self.reaction_container.compose()
except ValueError as err:
if str(err) == "mapping of graphs is not disjoint":
raise ValueError(
"Reaction contains inconsistent atom-mapping, perhaps duplicates"
)
raise ValueError("Reaction contains inconsistent atom-mapping, perhaps duplicates")
elif str(err).endswith("} not equal"):
raise ValueError(
"Atom with the same atom-mapping in reactant and product is not equal"
)
raise ValueError("Atom with the same atom-mapping in reactant and product is not equal")
else:
raise ValueError(f"Unknown problem with generating CGR: {err}")

Expand All @@ -58,10 +53,7 @@ def bonds_broken(self) -> int:
@property
def bonds_changed(self) -> int:
"""Returns the number of broken or formed bonds in the reaction"""
return sum(
bond.p_order is None or bond.order is None
for _, _, bond in self.cgr_container.bonds()
)
return sum(bond.p_order is None or bond.order is None for _, _, bond in self.cgr_container.bonds())

@property
def bonds_formed(self) -> int:
Expand All @@ -71,9 +63,7 @@ def bonds_formed(self) -> int:
@property
def total_centers(self) -> int:
"""Returns the number of atom and bond centers in the reaction"""
return len(self.cgr_container.center_atoms) + len(
self.cgr_container.center_bonds
)
return len(self.cgr_container.center_atoms) + len(self.cgr_container.center_bonds)

def distance_to(self, other: "CondensedGraphReaction") -> int:
"""
Expand Down Expand Up @@ -104,14 +94,11 @@ def _make_renumbered_mols(self):
# so this adds safe atom-mapping to un-mapped atoms
renumbered_mols = []
max_atom_map_numb = max(
max(atom_mapping_numbers(smi) or [0])
for smi in self.reaction.reactants_list + self.reaction.products_list
max(atom_mapping_numbers(smi) or [0]) for smi in self.reaction.reactants_list + self.reaction.products_list
)
for mol0 in self.reaction.reactants + self.reaction.products:
if mol0 is None:
raise ValueError(
"Cannot create CGR for this reaction, some molecules are None"
)
raise ValueError("Cannot create CGR for this reaction, some molecules are None")
mol = Chem.rdchem.Mol(mol0)
for atom in mol.GetAtoms():
if not atom.GetAtomMapNum():
Expand Down
17 changes: 7 additions & 10 deletions rxnutils/chem/disconnection_sites/atom_map_tagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import pandas as pd
from rdkit import Chem

from rxnutils.chem.utils import split_rsmi


def _get_atom_identifier(atom: Chem.rdchem.Atom) -> str:
"""
Expand All @@ -21,9 +23,7 @@ def _get_atom_identifier(atom: Chem.rdchem.Atom) -> str:
return str(atom_id)


def _get_bond_environment_identifier(
atoms: Sequence[Chem.rdchem.Atom], bond: Chem.rdchem.Bond
) -> str:
def _get_bond_environment_identifier(atoms: Sequence[Chem.rdchem.Atom], bond: Chem.rdchem.Bond) -> str:
"""
Get the environment of a specific bond.

Expand Down Expand Up @@ -79,17 +79,14 @@ def get_atom_list(reactants_smiles: str, product_smiles: str) -> List[int]:
ordered_reactant_neighbor_dict = _get_atomic_neighborhoods(reactants_smiles)
ordered_product_neighbor_dict = _get_atomic_neighborhoods(product_smiles)

all_indices = set(ordered_product_neighbor_dict.keys()) | set(
ordered_reactant_neighbor_dict.keys()
)
all_indices = set(ordered_product_neighbor_dict.keys()) | set(ordered_reactant_neighbor_dict.keys())

# Checks to see equivlence of atomic enviroments.
# If environment changed, then add atom to list
atom_list = [
atom_map
for atom_map in all_indices
if ordered_reactant_neighbor_dict.get(atom_map, [])
!= ordered_product_neighbor_dict.get(atom_map, [])
if ordered_reactant_neighbor_dict.get(atom_map, []) != ordered_product_neighbor_dict.get(atom_map, [])
]

return atom_list
Expand All @@ -104,7 +101,7 @@ def atom_map_tag_reactants(mapped_rxn: str) -> str:
:return: SMILES of the reactants containing tags corresponding to atoms changed in the
reaction.
"""
reactants_smiles, _, product_smiles = mapped_rxn.split(">")
reactants_smiles, _, product_smiles = split_rsmi(mapped_rxn)

reactants_mol = Chem.MolFromSmiles(reactants_smiles)
atom_list = get_atom_list(reactants_smiles, product_smiles)
Expand All @@ -128,7 +125,7 @@ def atom_map_tag_products(mapped_rxn: str) -> str:
:return: SMILES of the product containing tags corresponding to atoms changed in the
reaction.
"""
reactants_smiles, _, product_smiles = mapped_rxn.split(">")
reactants_smiles, _, product_smiles = split_rsmi(mapped_rxn)

product_mol = Chem.MolFromSmiles(product_smiles)
atom_list = get_atom_list(reactants_smiles, product_smiles)
Expand Down
51 changes: 13 additions & 38 deletions rxnutils/chem/disconnection_sites/tag_converting.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,21 @@ def smiles_tokens(smiles: str) -> List[str]:
:param smiles: SMILES to tokenize
:return: List of tokens identified in SMILES.
"""
pattern = r"(\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\(|\)|\.|=|#|-|\+|\\\\|\\|\/|:|~|@|\?|>|\*|\!|\$|\%[0-9]{2}|[0-9])"
pattern = (
r"(\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\(|\)|\.|=|#|-|\+|\\\\|\\|\/|:|~|@|\?|>|\*|\!|\$|\%[0-9]{2}|[0-9])"
)
regex = re.compile(pattern)
tokens = [token for token in regex.findall(smiles)]

tokenized_smiles = "".join(tokens)
if smiles != tokenized_smiles:
raise AssertionError(
f"tokenized SMILES not the same as input SMILES: {tokenized_smiles}, "
"{smiles}, tokens: {tokens}"
f"tokenized SMILES not the same as input SMILES: {tokenized_smiles}, " "{smiles}, tokens: {tokens}"
)
return tokens


def _next_tagged_token(
product_tagged_tokens: List[str], untagged_token: str, tagged_token_idx: int
) -> Tuple[str, int]:
def _next_tagged_token(product_tagged_tokens: List[str], untagged_token: str, tagged_token_idx: int) -> Tuple[str, int]:
"""
Get the next tagged token in the sequence. Includes checks and fixes for
stereochemistry changes due to removing atom mapping.
Expand All @@ -51,19 +50,13 @@ def _next_tagged_token(
tagged_token_idx += 1
return product_tagged_tokens[tagged_token_idx], tagged_token_idx

if (
tagged_token != untagged_token
and not ":1" in tagged_token
and "@" in tagged_token
):
if tagged_token != untagged_token and not ":1" in tagged_token and "@" in tagged_token:
return untagged_token, tagged_token_idx

return tagged_token, tagged_token_idx


def tagged_smiles_from_tokens(
product_tagged_tokens: List[str], product_untagged_tokens: List[str]
) -> Tuple[str, str]:
def tagged_smiles_from_tokens(product_tagged_tokens: List[str], product_untagged_tokens: List[str]) -> Tuple[str, str]:
"""
Convert the tagged SMILES from atom-mapping to unmapped-token + '!'

Expand All @@ -81,24 +74,16 @@ def tagged_smiles_from_tokens(

for untagged_token in product_untagged_tokens:

tagged_token, tagged_token_idx = _next_tagged_token(
product_tagged_tokens, untagged_token, tagged_token_idx
)
tagged_token, tagged_token_idx = _next_tagged_token(product_tagged_tokens, untagged_token, tagged_token_idx)

if tagged_token != untagged_token and (
untagged_token == "/" or untagged_token == "\\"
):
if tagged_token != untagged_token and (untagged_token == "/" or untagged_token == "\\"):
continue

if tagged_token == untagged_token:
product_converted += untagged_token
else:
# Remove brackets around a single letter
if (
len(untagged_token) == 3
and untagged_token.startswith("[")
and untagged_token.endswith("]")
):
if len(untagged_token) == 3 and untagged_token.startswith("[") and untagged_token.endswith("]"):
untagged_token = untagged_token[1]
product_converted += untagged_token + "!"

Expand All @@ -109,9 +94,7 @@ def tagged_smiles_from_tokens(
return product_converted, product_untagged


def _canonicalize_tagged_smiles(
product_tagged: str, product_untagged: str = None
) -> Tuple[str, str]:
def _canonicalize_tagged_smiles(product_tagged: str, product_untagged: str = None) -> Tuple[str, str]:
"""
Reorder the tagged-product SMILES on canonical form using the canonicalized
untagged product.
Expand All @@ -123,13 +106,7 @@ def _canonicalize_tagged_smiles(
mol = Chem.MolFromSmiles(product_tagged)
mol_untagged = Chem.MolFromSmiles(product_untagged)

_, canonical_atom_order = tuple(
zip(
*sorted(
[(j, i) for i, j in enumerate(Chem.CanonicalRankAtoms(mol_untagged))]
)
)
)
_, canonical_atom_order = tuple(zip(*sorted([(j, i) for i, j in enumerate(Chem.CanonicalRankAtoms(mol_untagged))])))

mol = Chem.RenumberAtoms(mol, canonical_atom_order)
mol_untagged = Chem.RenumberAtoms(mol_untagged, canonical_atom_order)
Expand Down Expand Up @@ -158,9 +135,7 @@ def convert_atom_map_tag(product_atom_map_tagged: str) -> str:
if not Chem.MolFromSmiles(product_untagged):
return ""

product_tagged, product_untagged = _canonicalize_tagged_smiles(
product_atom_map_tagged, product_untagged
)
product_tagged, product_untagged = _canonicalize_tagged_smiles(product_atom_map_tagged, product_untagged)

# Update the SMILES string to remove atom-mapping brackets and explicit [H]:s and
# replace by <atom>!
Expand Down
Empty file.
Loading
Loading