Skip to content

Commit

Permalink
Loader: finished updating load to new resource generator structure
Browse files Browse the repository at this point in the history
  • Loading branch information
deeenes committed Nov 1, 2024
1 parent 0d94e16 commit 22edeb9
Showing 1 changed file with 20 additions and 10 deletions.
30 changes: 20 additions & 10 deletions omnipath_metabo/schema/_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,14 +124,19 @@ def load(self):
cached_resource = Tee(
self.resource,
struct = lambda x: x,
yld = lambda x: x['structure'],
)
psycopg2.extras.execute_values(cursor, query, cached_resource, page_size = 1000)


raw_con.commit()
_log("structures have been inserted, creating mol column")
_log("structures have been inserted")

return_ids = text("SELECT id, smiles FROM structures")
inserted_str = set(s[1] for s in cached_resource.cached['struct'])
inserted_str = {
s['structure'][1]
for s in cached_resource.cached['struct']
}
strids = {
smiles : id
for id, smiles in self.session.execute(return_ids)
Expand All @@ -146,8 +151,11 @@ def load(self):

insert_ids = (
(name, strids[smiles], resource_key, True, resource_key)
for name, smiles in cached_resource.cached['struct']
for name, smiles in (
r['structure'] for r in cached_resource.cached['struct']
)
)

_log('inserting identifiers.')
with raw_con.cursor() as cursor:
query = """
Expand All @@ -159,7 +167,6 @@ def load(self):
raw_con.commit()
_log('identifiers inserted.')

_log("structures have been inserted, creating mol column")
return_ids = text(
"SELECT id, structure_id, identifier, resource_id, id_type "
f"FROM identifiers WHERE resource_id = {resid}")
Expand All @@ -171,11 +178,11 @@ def load(self):
inserted_str = {
(strid_to_smile[s[1]], s[2], s[3], s[4])
for s in cached_resource.cached['struct'])

property_records = (
(
identifier_ids[
(record['structure'][0],
(record['structure'][0],
strids[record['structure'][1]],
resid,
True,
Expand All @@ -192,19 +199,22 @@ def load(self):
INSERT INTO properties (identifier_id, mw, monoiso_mass, charge, formula) VALUES %s
"""
psycopg2.extras.execute_values(cursor, query, inserted_str, page_size = 1000)


#self.indexer()


_log(f'Finished loading {self.resource.name}.', level = -1)

def update_mol_column(self):

log("Creating mol column")
query = text("update structures set mol = mol_from_smiles(smiles::cstring) where mol is null")
self.session.execute(query)
self.session.commit()
_log('Finished creating mol column.')


def indexer(self):
"""
Creates a index of the mol structures using gist. Allows for substructure searches of the molecules.
Expand Down

0 comments on commit 22edeb9

Please sign in to comment.