Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
SeonghwanSeo committed Aug 7, 2024
1 parent 3adba49 commit 59fcba4
Show file tree
Hide file tree
Showing 11 changed files with 314 additions and 331 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ weights
run.sh
result/
examples/library/
nogit/
test.sh


# Byte-compiled / optimized / DLL files
Expand Down
13 changes: 11 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -212,13 +212,22 @@ OUTPUT=(multi_scale_features, hotspot_info)
For feature extraction, it is recommended to use `score_threshold=0.5` instead of default setting used for pharmacophore modeling. If you want to extract more features, decrease the `score_threshold`.

```python
from pmnet.module import PharmacoNet
from pmnet.module import PharmacoNet, parse_protein
module = PharmacoNet(
"cuda",
score_threshold = 0.5 # <SCORE_THRESHOLD: float | dict[str, float], recommended=0.5>,
score_threshold = 0.5, # <SCORE_THRESHOLD: float | dict[str, float], recommended=0.5>,
molvoxel_library = 'numpy' # <MOLVOXEL_LIBRARY: str, if you use it in `Dataset`, set 'numpy'>
)
# End-to-End calculation
multi_scale_features, hotspot_infos = module.feature_extraction(<PROTEIN_PATH>, <REF_LIGAND_PATH>)
multi_scale_features, hotspot_infos = module.feature_extraction(<PROTEIN_PATH>, center=(<X>, <Y>, <Z>))

# Step-wise calculation
voxelizer = module.voxelizer
# In Dataset (Type: Tuple[Tensor, Tensor, Tensor, Tensor])
protein_data = module.parse_protein(voxelizer, <PROTEIN_PATH>, <REF_LIGAND_PATH>, <CENTER_NOISE>)
# In Model
multi_scale_features, hotspot_infos = module.run_extraction(protein_data)
```

### Paper List
Expand Down
7 changes: 0 additions & 7 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,3 @@ dependencies:
- openbabel=3.1.1
- pymol-open-source=3.0.0
- numpy=1.26.4
- pip:
- tqdm
- molvoxel==0.1.3
- numba==0.59.1
- omegaconf==2.3.0
- gdown==5.1.0
- biopython==1.83
13 changes: 7 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "pharmaconet"
version = "2.0.1"
version = "2.0.2"
description = "PharmacoNet: Open-Source Software for Protein-based Pharmacophore Modeling and Virtual Screening"
license = { text = "MIT" }
authors = [{ name = "Seonghwan Seo", email = "shwan0106@kaist.ac.kr" }]
Expand All @@ -24,13 +24,14 @@ classifiers = [
]

dependencies = [
"tqdm",
"torch>=1.13.0",
"numpy==1.26.4",
"numba==0.59.1",
"numpy>=1.26,<1.27",
"numba>=0.59",
"omegaconf>=2.3.0",
"molvoxel==0.1.3",
"gdown==5.1.0",
"biopython==1.83"
"molvoxel>=0.1.3",
"gdown>=5.1.0",
"biopython>=1.83"
]

[project.urls]
Expand Down
2 changes: 1 addition & 1 deletion src/pmnet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .pharmacophore_model import PharmacophoreModel

__version__ = "2.0.1"
__version__ = "2.0.2"
__citation_information__ = (
"Seo, S., & Kim, W. Y. (2023, December). "
"PharmacoNet: Accelerating Large-Scale Virtual Screening by Deep Pharmacophore Modeling. "
Expand Down
71 changes: 56 additions & 15 deletions src/pmnet/data/extract_pocket.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,63 @@
import os
import numpy as np
import math

from Bio.PDB import PDBParser, PDBIO
from Bio.PDB.PDBIO import Select

from typing import Union
from numpy.typing import ArrayLike
from pathlib import Path

import warnings

warnings.filterwarnings("ignore")

AMINO_ACID = [
'GLY', 'ALA', 'VAL', 'LEU', 'ILE', 'PRO', 'PHE', 'TYR', 'TRP', 'SER',
'THR', 'CYS', 'MET', 'ASN', 'GLN', 'ASP', 'GLU', 'LYS', 'ARG', 'HIS',
'HIP', 'HIE', 'TPO', 'HID', 'LEV', 'MEU', 'PTR', 'GLV', 'CYT', 'SEP',
'HIZ', 'CYM', 'GLM', 'ASQ', 'TYS', 'CYX', 'GLZ', 'MSE', 'CSO', 'KCX',
'CSD', 'MLY', 'PCA', 'LLP'
"GLY",
"ALA",
"VAL",
"LEU",
"ILE",
"PRO",
"PHE",
"TYR",
"TRP",
"SER",
"THR",
"CYS",
"MET",
"ASN",
"GLN",
"ASP",
"GLU",
"LYS",
"ARG",
"HIS",
"HIP",
"HIE",
"TPO",
"HID",
"LEV",
"MEU",
"PTR",
"GLV",
"CYT",
"SEP",
"HIZ",
"CYM",
"GLM",
"ASQ",
"TYS",
"CYX",
"GLZ",
"MSE",
"CSO",
"KCX",
"CSD",
"MLY",
"PCA",
"LLP",
]


Expand All @@ -28,11 +71,9 @@ def accept_residue(self, residue):
return 0
if residue.get_resname() not in AMINO_ACID:
return 0
residue_positions = np.array([
list(atom.get_vector())
for atom in residue.get_atoms()
if "H" not in atom.get_id()
])
residue_positions = np.array(
[list(atom.get_vector()) for atom in residue.get_atoms() if "H" not in atom.get_id()]
)
if residue_positions.shape[0] == 0:
return 0
min_dis = np.min(np.linalg.norm(residue_positions - self.center, axis=-1))
Expand All @@ -42,14 +83,14 @@ def accept_residue(self, residue):
return 0


DEFAULT_CUTOFF = 16 * math.sqrt(3) + 5.0


def extract_pocket(
protein_pdb_path: str,
out_pocket_pdb_path: str,
center: ArrayLike,
cutoff: float
protein_pdb_path: Union[str, Path], out_pocket_pdb_path: str, center: ArrayLike, cutoff: float = DEFAULT_CUTOFF
):
parser = PDBParser()
structure = parser.get_structure("protein", protein_pdb_path)
structure = parser.get_structure("protein", str(protein_pdb_path))
io = PDBIO()
io.set_structure(structure)
io.save(out_pocket_pdb_path, DistSelect(center, cutoff))
Expand Down
34 changes: 14 additions & 20 deletions src/pmnet/data/token_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
from . import constant as C


def get_token_informations(
protein_obj: Protein,
) -> Tuple[NDArray[np.float32], NDArray[np.int16]]:
def get_token_informations(protein_obj: Protein) -> Tuple[NDArray[np.float32], NDArray[np.int16]]:
"""get token information
Args:
Expand All @@ -20,14 +18,15 @@ def get_token_informations(
token_positions: [float, (N, 3)] token center positions
token_classes: [int, (N,)] token interaction type
"""
num_tokens = \
len(protein_obj.hydrophobic_atoms_all) + \
len(protein_obj.rings_all) * 3 + \
len(protein_obj.hbond_donors_all) + \
len(protein_obj.hbond_acceptors_all) + \
len(protein_obj.pos_charged_atoms_all) * 2 + \
len(protein_obj.neg_charged_atoms_all) + \
len(protein_obj.xbond_acceptors_all)
num_tokens = (
len(protein_obj.hydrophobic_atoms_all)
+ len(protein_obj.rings_all) * 3
+ len(protein_obj.hbond_donors_all)
+ len(protein_obj.hbond_acceptors_all)
+ len(protein_obj.pos_charged_atoms_all) * 2
+ len(protein_obj.neg_charged_atoms_all)
+ len(protein_obj.xbond_acceptors_all)
)

positions: List[Tuple[float, float, float]] = []
classes: List[int] = []
Expand Down Expand Up @@ -83,8 +82,6 @@ def get_token_and_filter(
positions: NDArray[np.float32],
classes: NDArray[np.int16],
center: NDArray[np.float32],
resolution: float,
dimension: int,
) -> Tuple[NDArray[np.int16], NDArray[np.int16]]:
"""Create token and Filtering valid instances
Expand All @@ -99,6 +96,7 @@ def get_token_and_filter(
token: [int, (N_token, 4)]
filter: [int, (N_token,)]
"""
resolution, dimension = 0.5, 64
filter = []
tokens = []
x_center, y_center, z_center = center
Expand All @@ -116,12 +114,7 @@ def get_token_and_filter(
return np.array(tokens, dtype=np.int16), np.array(filter, dtype=np.int16)


def get_box_area(
tokens: ArrayLike,
pharmacophore_size: float,
resolution: float,
dimension: int,
) -> NDArray[np.bool_]:
def get_box_area(tokens: ArrayLike) -> NDArray[np.bool_]:
"""Create Box Area
Args:
Expand All @@ -132,9 +125,10 @@ def get_box_area(
Returns:
box_areas: BoolArray [Ntoken, D, H, W] D=H=W=dimension
"""
resolution, dimension, pharmacophore_size = 0.5, 64, 1.0
num_tokens = len(tokens)
box_areas = np.zeros((num_tokens, dimension, dimension, dimension), dtype=np.bool_)
grids = np.stack(np.meshgrid(np.arange(dimension), np.arange(dimension), np.arange(dimension), indexing='ij'), 3)
grids = np.stack(np.meshgrid(np.arange(dimension), np.arange(dimension), np.arange(dimension), indexing="ij"), 3)
for i, (x, y, z, t) in enumerate(tokens):
x, y, z, t = int(x), int(y), int(z), int(t)
distances = np.linalg.norm(grids - np.array([[x, y, z]]), axis=-1)
Expand Down
Loading

0 comments on commit 59fcba4

Please sign in to comment.