Skip to content

Commit

Permalink
Merge pull request #8 from SeonghwanSeo/dev-2.0.2
Browse files Browse the repository at this point in the history
Dev 2.0.2
  • Loading branch information
SeonghwanSeo authored Aug 7, 2024
2 parents e61d36f + 59fcba4 commit c1bdee0
Show file tree
Hide file tree
Showing 13 changed files with 377 additions and 454 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ weights
run.sh
result/
examples/library/
nogit/
test.sh


# Byte-compiled / optimized / DLL files
Expand Down
13 changes: 11 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -212,13 +212,22 @@ OUTPUT=(multi_scale_features, hotspot_info)
For feature extraction, it is recommended to use `score_threshold=0.5` instead of default setting used for pharmacophore modeling. If you want to extract more features, decrease the `score_threshold`.

```python
from pmnet.module import PharmacoNet
from pmnet.module import PharmacoNet, parse_protein
module = PharmacoNet(
"cuda",
score_threshold = 0.5 # <SCORE_THRESHOLD: float | dict[str, float], recommended=0.5>,
score_threshold = 0.5, # <SCORE_THRESHOLD: float | dict[str, float], recommended=0.5>,
molvoxel_library = 'numpy' # <MOLVOXEL_LIBRARY: str, if you use it in `Dataset`, set 'numpy'>
)
# End-to-End calculation
multi_scale_features, hotspot_infos = module.feature_extraction(<PROTEIN_PATH>, <REF_LIGAND_PATH>)
multi_scale_features, hotspot_infos = module.feature_extraction(<PROTEIN_PATH>, center=(<X>, <Y>, <Z>))

# Step-wise calculation
voxelizer = module.voxelizer
# In Dataset (Type: Tuple[Tensor, Tensor, Tensor, Tensor])
protein_data = module.parse_protein(voxelizer, <PROTEIN_PATH>, <REF_LIGAND_PATH>, <CENTER_NOISE>)
# In Model
multi_scale_features, hotspot_infos = module.run_extraction(protein_data)
```

### Paper List
Expand Down
7 changes: 0 additions & 7 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,3 @@ dependencies:
- openbabel=3.1.1
- pymol-open-source=3.0.0
- numpy=1.26.4
- pip:
- tqdm
- molvoxel==0.1.3
- numba==0.59.1
- omegaconf==2.3.0
- gdown==5.1.0
- biopython==1.83
74 changes: 36 additions & 38 deletions feature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,49 +24,47 @@ def __init__(self):
self.add_argument("--cuda", action="store_true", help="use gpu acceleration with CUDA")


"""
return tuple[multi_scale_features, hotspot_info]
multi_scale_features: list[torch.Tensor]:
- [96, 4, 4, 4], [96, 8, 8, 8], [96, 16, 16, 16], [96, 32, 32, 32], [96, 64, 64, 64]
hotspot_info
- hotspot_feature: torch.Tensor (192,)
- hotspot_position: tuple[float, float, float] - (x, y, z)
- hotspot_score: float in [0, 1]
def main(args):
"""
return tuple[multi_scale_features, hotspot_info]
multi_scale_features: list[torch.Tensor]:
- [96, 4, 4, 4], [96, 8, 8, 8], [96, 16, 16, 16], [96, 32, 32, 32], [96, 64, 64, 64]
hotspot_info
- hotspot_feature: torch.Tensor (192,)
- hotspot_position: tuple[float, float, float] - (x, y, z)
- hotspot_score: float in [0, 1]
- nci_type: str (10 types)
'Hydrophobic': Hydrophobic interaction
'PiStacking_P': PiStacking (Parallel)
'PiStacking_T': PiStacking (T-shaped)
'PiCation_lring': Interaction btw Protein Cation & Ligand Aromatic Ring
'PiCation_pring': Interaction btw Protein Aromatic Ring & Ligand Cation
'SaltBridge_pneg': SaltBridge btw Protein Anion & Ligand Cation
'SaltBridge_lneg': SaltBridge btw Protein Cation & Ligand Anion
'XBond': Halogen Bond
'HBond_pdon': Hydrogen Bond btw Protein Donor & Ligand Acceptor
'HBond_ldon': Hydrogen Bond btw Protein Acceptor & Ligand Donor
- nci_type: str (10 types)
'Hydrophobic': Hydrophobic interaction
'PiStacking_P': PiStacking (Parallel)
'PiStacking_T': PiStacking (T-shaped)
'PiCation_lring': Interaction btw Protein Cation & Ligand Aromatic Ring
'PiCation_pring': Interaction btw Protein Aromatic Ring & Ligand Cation
'SaltBridge_pneg': SaltBridge btw Protein Anion & Ligand Cation
'SaltBridge_lneg': SaltBridge btw Protein Cation & Ligand Anion
'XBond': Halogen Bond
'HBond_pdon': Hydrogen Bond btw Protein Donor & Ligand Acceptor
'HBond_ldon': Hydrogen Bond btw Protein Acceptor & Ligand Donor
- hotspot_type: str (7 types)
{'Hydrophobic', 'Aromatic', 'Cation', 'Anion',
'Halogen', 'HBond_donor', 'HBond_acceptor'}
*** `type` is obtained from `nci_type`.
- point_type: str (7 types)
{'Hydrophobic', 'Aromatic', 'Cation', 'Anion',
'Halogen', 'HBond_donor', 'HBond_acceptor'}
*** `type` is obtained from `nci_type`.
]
"""
- hotspot_type: str (7 types)
{'Hydrophobic', 'Aromatic', 'Cation', 'Anion',
'Halogen', 'HBond_donor', 'HBond_acceptor'}
*** `type` is obtained from `nci_type`.
- point_type: str (7 types)
{'Hydrophobic', 'Aromatic', 'Cation', 'Anion',
'Halogen', 'HBond_donor', 'HBond_acceptor'}
*** `type` is obtained from `nci_type`.
]
"""
device = "cuda" if args.cuda else "cpu"
score_threshold = 0.5 # NOTE: RECOMMENDED_SCORE_THRESHOLD


# NOTE: RECOMMENDED
RECOMMENDED_SCORE_THRESHOLD = 0.5
module = PharmacoNet(device, score_threshold)
multi_scale_features, hotspot_infos = module.feature_extraction(args.protein, args.ref_ligand, args.center)
torch.save([multi_scale_features, hotspot_infos], args.out)


if __name__ == "__main__":
parser = ArgParser()
args = parser.parse_args()
module = PharmacoNet(
device="cuda" if args.cuda else "cpu",
score_threshold=RECOMMENDED_SCORE_THRESHOLD,
)
multi_scale_features, hotspot_infos = module.feature_extraction(args.protein, args.ref_ligand, args.center)
torch.save([multi_scale_features, hotspot_infos], args.out)
main(args)
13 changes: 7 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "pharmaconet"
version = "2.0.1"
version = "2.0.2"
description = "PharmacoNet: Open-Source Software for Protein-based Pharmacophore Modeling and Virtual Screening"
license = { text = "MIT" }
authors = [{ name = "Seonghwan Seo", email = "shwan0106@kaist.ac.kr" }]
Expand All @@ -24,13 +24,14 @@ classifiers = [
]

dependencies = [
"tqdm",
"torch>=1.13.0",
"numpy==1.26.4",
"numba==0.59.1",
"numpy>=1.26,<1.27",
"numba>=0.59",
"omegaconf>=2.3.0",
"molvoxel==0.1.3",
"gdown==5.1.0",
"biopython==1.83"
"molvoxel>=0.1.3",
"gdown>=5.1.0",
"biopython>=1.83"
]

[project.urls]
Expand Down
2 changes: 1 addition & 1 deletion src/pmnet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .pharmacophore_model import PharmacophoreModel

__version__ = "2.0.1"
__version__ = "2.0.2"
__citation_information__ = (
"Seo, S., & Kim, W. Y. (2023, December). "
"PharmacoNet: Accelerating Large-Scale Virtual Screening by Deep Pharmacophore Modeling. "
Expand Down
71 changes: 56 additions & 15 deletions src/pmnet/data/extract_pocket.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,63 @@
import os
import numpy as np
import math

from Bio.PDB import PDBParser, PDBIO
from Bio.PDB.PDBIO import Select

from typing import Union
from numpy.typing import ArrayLike
from pathlib import Path

import warnings

warnings.filterwarnings("ignore")

AMINO_ACID = [
'GLY', 'ALA', 'VAL', 'LEU', 'ILE', 'PRO', 'PHE', 'TYR', 'TRP', 'SER',
'THR', 'CYS', 'MET', 'ASN', 'GLN', 'ASP', 'GLU', 'LYS', 'ARG', 'HIS',
'HIP', 'HIE', 'TPO', 'HID', 'LEV', 'MEU', 'PTR', 'GLV', 'CYT', 'SEP',
'HIZ', 'CYM', 'GLM', 'ASQ', 'TYS', 'CYX', 'GLZ', 'MSE', 'CSO', 'KCX',
'CSD', 'MLY', 'PCA', 'LLP'
"GLY",
"ALA",
"VAL",
"LEU",
"ILE",
"PRO",
"PHE",
"TYR",
"TRP",
"SER",
"THR",
"CYS",
"MET",
"ASN",
"GLN",
"ASP",
"GLU",
"LYS",
"ARG",
"HIS",
"HIP",
"HIE",
"TPO",
"HID",
"LEV",
"MEU",
"PTR",
"GLV",
"CYT",
"SEP",
"HIZ",
"CYM",
"GLM",
"ASQ",
"TYS",
"CYX",
"GLZ",
"MSE",
"CSO",
"KCX",
"CSD",
"MLY",
"PCA",
"LLP",
]


Expand All @@ -28,11 +71,9 @@ def accept_residue(self, residue):
return 0
if residue.get_resname() not in AMINO_ACID:
return 0
residue_positions = np.array([
list(atom.get_vector())
for atom in residue.get_atoms()
if "H" not in atom.get_id()
])
residue_positions = np.array(
[list(atom.get_vector()) for atom in residue.get_atoms() if "H" not in atom.get_id()]
)
if residue_positions.shape[0] == 0:
return 0
min_dis = np.min(np.linalg.norm(residue_positions - self.center, axis=-1))
Expand All @@ -42,14 +83,14 @@ def accept_residue(self, residue):
return 0


DEFAULT_CUTOFF = 16 * math.sqrt(3) + 5.0


def extract_pocket(
protein_pdb_path: str,
out_pocket_pdb_path: str,
center: ArrayLike,
cutoff: float
protein_pdb_path: Union[str, Path], out_pocket_pdb_path: str, center: ArrayLike, cutoff: float = DEFAULT_CUTOFF
):
parser = PDBParser()
structure = parser.get_structure("protein", protein_pdb_path)
structure = parser.get_structure("protein", str(protein_pdb_path))
io = PDBIO()
io.set_structure(structure)
io.save(out_pocket_pdb_path, DistSelect(center, cutoff))
Expand Down
34 changes: 14 additions & 20 deletions src/pmnet/data/token_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
from . import constant as C


def get_token_informations(
protein_obj: Protein,
) -> Tuple[NDArray[np.float32], NDArray[np.int16]]:
def get_token_informations(protein_obj: Protein) -> Tuple[NDArray[np.float32], NDArray[np.int16]]:
"""get token information
Args:
Expand All @@ -20,14 +18,15 @@ def get_token_informations(
token_positions: [float, (N, 3)] token center positions
token_classes: [int, (N,)] token interaction type
"""
num_tokens = \
len(protein_obj.hydrophobic_atoms_all) + \
len(protein_obj.rings_all) * 3 + \
len(protein_obj.hbond_donors_all) + \
len(protein_obj.hbond_acceptors_all) + \
len(protein_obj.pos_charged_atoms_all) * 2 + \
len(protein_obj.neg_charged_atoms_all) + \
len(protein_obj.xbond_acceptors_all)
num_tokens = (
len(protein_obj.hydrophobic_atoms_all)
+ len(protein_obj.rings_all) * 3
+ len(protein_obj.hbond_donors_all)
+ len(protein_obj.hbond_acceptors_all)
+ len(protein_obj.pos_charged_atoms_all) * 2
+ len(protein_obj.neg_charged_atoms_all)
+ len(protein_obj.xbond_acceptors_all)
)

positions: List[Tuple[float, float, float]] = []
classes: List[int] = []
Expand Down Expand Up @@ -83,8 +82,6 @@ def get_token_and_filter(
positions: NDArray[np.float32],
classes: NDArray[np.int16],
center: NDArray[np.float32],
resolution: float,
dimension: int,
) -> Tuple[NDArray[np.int16], NDArray[np.int16]]:
"""Create token and Filtering valid instances
Expand All @@ -99,6 +96,7 @@ def get_token_and_filter(
token: [int, (N_token, 4)]
filter: [int, (N_token,)]
"""
resolution, dimension = 0.5, 64
filter = []
tokens = []
x_center, y_center, z_center = center
Expand All @@ -116,12 +114,7 @@ def get_token_and_filter(
return np.array(tokens, dtype=np.int16), np.array(filter, dtype=np.int16)


def get_box_area(
tokens: ArrayLike,
pharmacophore_size: float,
resolution: float,
dimension: int,
) -> NDArray[np.bool_]:
def get_box_area(tokens: ArrayLike) -> NDArray[np.bool_]:
"""Create Box Area
Args:
Expand All @@ -132,9 +125,10 @@ def get_box_area(
Returns:
box_areas: BoolArray [Ntoken, D, H, W] D=H=W=dimension
"""
resolution, dimension, pharmacophore_size = 0.5, 64, 1.0
num_tokens = len(tokens)
box_areas = np.zeros((num_tokens, dimension, dimension, dimension), dtype=np.bool_)
grids = np.stack(np.meshgrid(np.arange(dimension), np.arange(dimension), np.arange(dimension), indexing='ij'), 3)
grids = np.stack(np.meshgrid(np.arange(dimension), np.arange(dimension), np.arange(dimension), indexing="ij"), 3)
for i, (x, y, z, t) in enumerate(tokens):
x, y, z, t = int(x), int(y), int(z), int(t)
distances = np.linalg.norm(grids - np.array([[x, y, z]]), axis=-1)
Expand Down
Loading

0 comments on commit c1bdee0

Please sign in to comment.