Skip to content

Commit

Permalink
Added log messages for loader.load()
Browse files Browse the repository at this point in the history
  • Loading branch information
for-hyde committed Oct 16, 2024
1 parent b57d28a commit 48b4483
Showing 1 changed file with 17 additions and 19 deletions.
36 changes: 17 additions & 19 deletions omnipath_metabo/schema/_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
from sqlalchemy import text, select
from sqlalchemy.dialects.postgresql import insert
import psycopg2.extras
from omnipath_metabo import _log

from . import _structure
from ._base import Base
from ._connection import Connection

_log("Hello from main module")

def create(con):

Base.metadata.create_all(con.engine)
Expand Down Expand Up @@ -64,58 +67,51 @@ def __init__(self, resource, con):


def load(self):

insert_resource = insert(_structure.Resource).values(
name = self.resource.name
)
insert_resource = insert_resource.on_conflict_do_nothing(index_elements=['name'])
self.session.execute(insert_resource)
ids = collections.defaultdict(set)
_log(f'loading resource {self.resource.name}')

raw_con = self.con.engine.raw_connection()

with raw_con.cursor() as cursor:

query = """
INSERT INTO structures (name, smiles) VALUES %s ON CONFLICT (smiles) DO NOTHING
"""
_log("loading insert statments for structures table")
psycopg2.extras.execute_values(cursor, query, self.resource, page_size = 1000)
"""
for i, row in enumerate(self.resource):
insert_statement = insert(self.scheme).values(
smiles=row[1],
name=row[0],
)
ids[row[1]].add(row[0])
insert_statement = insert_statement.on_conflict_do_nothing(index_elements = ['smiles'])
self.session.execute(insert_statement)
if i > 1000:
break
"""

raw_con.commit()
_log("structures have been inserted, creating mol column")

self.update_mol_column()

_log('collecting structure ids')
select_str_ids = text('SELECT id, smiles FROM structures')
strids = {
id[1]: id[0]
for id in self.session.execute(select_str_ids)
}

_log('structure ids collected, collecting resource ids')
select_res_ids = text('SELECT id, name FROM resources')
resid= {
id[1]: id[0]
for id in self.session.execute(select_res_ids)
}
resource_key = resid[self.resource.name]
_log('resource ids collected.')

insert_ids = (
(id, strids[smiles], resource_key)
for smiles, _ids in ids.items()
for id in _ids
)

_log('inserting identifiers.')
with raw_con.cursor() as cursor:
query = """
INSERT INTO identifiers (identifier, structure_id, resource_id) VALUES %s
Expand All @@ -124,13 +120,15 @@ def load(self):


raw_con.commit()

_log('identifiers inserted.')
#self.indexer()
_log(f'{self.resource.name} loaded')

def update_mol_column(self):
query = text("update structures set mol = mol_from_smiles(smiles::cstring) where mol is null")
self.session.execute(query)
self.session.commit()
_log('Finished creating mol column.')

def indexer(self):
"""
Expand Down

0 comments on commit 48b4483

Please sign in to comment.