-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_gathering.py
102 lines (88 loc) · 3.96 KB
/
data_gathering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import xml.etree.ElementTree as ET
import requests
def fetch_pubchem_data(drug_name: str) -> dict[str, str | float | None]:
"""
Fetches molecular data from PubChem for a given drug name.
Parameters:
drug_name (str): The name of the drug to fetch data for.
Returns:
dict: A dictionary containing molecular data from PubChem.
"""
url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{drug_name}/property/MolecularFormula,MolecularWeight,XLogP,HBondDonorCount,HBondAcceptorCount,ExactMass,TPSA/JSON"
try:
response = requests.get(url, verify=False)
response.raise_for_status() # Raise an error for bad status codes
data = response.json()
compound_info = data["PropertyTable"]["Properties"][0]
return {
"Drug": drug_name,
"Molecular Formula": compound_info.get("MolecularFormula"),
"Molecular Weight": compound_info.get("MolecularWeight"),
"XLogP": compound_info.get("XLogP"),
"H-Bond Donor Count": compound_info.get("HBondDonorCount"),
"H-Bond Acceptor Count": compound_info.get("HBondAcceptorCount"),
"Exact Mass": compound_info.get("ExactMass"),
"Topological Polar Surface Area (TPSA)": compound_info.get("TPSA"),
}
except requests.exceptions.HTTPError as http_err:
print(f"HTTP error occurred for {drug_name}: {http_err}")
except requests.exceptions.RequestException as req_err:
print(f"Request error occurred for {drug_name}: {req_err}")
except Exception as err:
print(f"An error occurred for {drug_name}: {err}")
return {}
def fetch_chembl_data(chembl_id: str) -> list[dict[str, str | None]]:
"""
Fetches bioactivity data from ChEMBL for a given ChEMBL ID.
Parameters:
chembl_id (str): The ChEMBL ID of the compound to fetch data for.
Returns:
list: A list of dictionaries containing bioactivity data from ChEMBL.
"""
url = (
f"https://www.ebi.ac.uk/chembl/api/data/activity?molecule_chembl_id={chembl_id}"
)
try:
response = requests.get(url)
response.raise_for_status() # Raise an error for bad status codes
data = response.content
# Parse the XML content
root = ET.fromstring(data)
activity_data = []
for activity in root.findall(".//activity"):
entry = {
"activity_id": activity.find("activity_id").text,
"assay_description": activity.find("assay_description").text,
"standard_type": activity.find("standard_type").text,
"standard_value": (
activity.find("standard_value").text
if activity.find("standard_value") is not None
else None
),
"standard_units": (
activity.find("standard_units").text
if activity.find("standard_units") is not None
else None
),
"target_organism": (
activity.find("target_organism").text
if activity.find("target_organism") is not None
else None
),
"target_pref_name": (
activity.find("target_pref_name").text
if activity.find("target_pref_name") is not None
else None
),
}
activity_data.append(entry)
return activity_data
except requests.exceptions.HTTPError as http_err:
print(f"HTTP error occurred for ChEMBL ID {chembl_id}: {http_err}")
except requests.exceptions.RequestException as req_err:
print(f"Request error occurred for ChEMBL ID {chembl_id}: {req_err}")
except ET.ParseError as parse_err:
print(f"Error parsing XML for ChEMBL ID {chembl_id}: {parse_err}")
except Exception as err:
print(f"An error occurred for ChEMBL ID {chembl_id}: {err}")
return []