Skip to content

Commit

Permalink
Updates compound data file and related functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
IvanChernyshov committed Sep 24, 2024
1 parent e2bd688 commit 41e0ccd
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 15 deletions.
2 changes: 1 addition & 1 deletion nistchempy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@
from nistchempy.compound_list import get_all_data
from nistchempy.compound import get_compound
from nistchempy.search import run_search, NistSearchParameters
from nistchempy.search import print_search_parameters
from nistchempy.search import get_search_parameters, print_search_parameters


8 changes: 2 additions & 6 deletions nistchempy/compound_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,12 @@ def get_all_data() -> _pd.core.frame.DataFrame:
_pd.core.frame.DataFrame: dataframe containing pre-extracted compound info
'''
dt0 = {'mol_weight': 'float64'}
dt1 = {k: 'string' for k in ('ID', 'name', 'formula', 'inchi', 'inchi_key', 'cas_rn')}
dt2 = {k: 'bool' for k in ('mol2D', 'mol3D', 'cIR', 'cTZ', 'cMS', 'cUV', 'cGC',
'cTG', 'cTC', 'cTP', 'cSO', 'cTR', 'cIE', 'cIC', 'cES', 'cDI')}
dtypes = {**dt0, **dt1, **dt2}
pkg = _importlib_resources.files('nistchempy')
data_file = pkg / 'nist_data.zip'
with _importlib_resources.as_file(data_file) as path:
zf = _zipfile.ZipFile(path)
df = _pd.read_csv(zf.open('nist_data.csv'), dtype = dtypes)
df = _pd.read_csv(zf.open('nist_data.csv'), dtype = 'str')
df['mol_weight'] = df['mol_weight'].astype(float)
zf.close()

return df
Expand Down
Binary file modified nistchempy/nist_data.zip
Binary file not shown.
16 changes: 8 additions & 8 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,40 +6,40 @@
class TestSearch:

def test_search_id(self):
s = nist.search('C71432', 'id')
s = nist.run_search('C71432', 'id')
assert len(s.compounds) == 1
assert s.compounds[0].name.lower() == 'benzene'

def test_search_casrn(self):
s = nist.search('71-43-2', 'cas')
s = nist.run_search('71-43-2', 'cas')
assert len(s.compounds) == 1
assert s.compounds[0].name.lower() == 'benzene'

def test_search_name(self):
s = nist.search('*butadiene*', 'name')
s = nist.run_search('*butadiene*', 'name')
assert len(s.compound_ids) > 0
X = nist.get_compound(s.compound_ids[0])
names = [X.name] + X.synonyms
assert any(['butadiene' in name.lower() for name in names])

def test_search_formula(self):
s = nist.search('C6H?Cl3', 'formula')
s = nist.run_search('C6H?Cl3', 'formula')
assert s.compound_ids

def test_search_inchi(self):
s = nist.search('InChI=1S/C10H14O2/c1-6-3-4-8-7(2)5-12-10(11)9(6)8/h5-6,8-9H,3-4H2,1-2H3', 'inchi')
s = nist.run_search('InChI=1S/C10H14O2/c1-6-3-4-8-7(2)5-12-10(11)9(6)8/h5-6,8-9H,3-4H2,1-2H3', 'inchi')
assert s.compound_ids

def test_search_bad_inchi(self):
s = nist.search('qwe-qwe-qwe', 'inchi')
s = nist.run_search('qwe-qwe-qwe', 'inchi')
assert not s.compound_ids

def test_search_lost(self):
s = nist.search('C?H?O?', 'formula')
s = nist.run_search('C?H?O?', 'formula')
assert s.lost

def test_load_compounds(self):
s = nist.search('InChI=1S/C10H14O2/c1-6-3-4-8-7(2)5-12-10(11)9(6)8/h5-6,8-9H,3-4H2,1-2H3', 'inchi')
s = nist.run_search('InChI=1S/C10H14O2/c1-6-3-4-8-7(2)5-12-10(11)9(6)8/h5-6,8-9H,3-4H2,1-2H3', 'inchi')
s.load_found_compounds()
assert all([X.ID is not None for X in s.compounds])

Expand Down

0 comments on commit 41e0ccd

Please sign in to comment.