Skip to content

Commit

Permalink
Updating data model
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomas-S-Allen committed Feb 27, 2024
1 parent 08050e8 commit 5fccca4
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 8 deletions.
11 changes: 6 additions & 5 deletions ClassifierPipeline/app.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

import json

# from builtins import str
# from .models import ClaimsLog, Records, AuthorInfo, ChangeLog
Expand All @@ -17,7 +17,6 @@
from sqlalchemy.orm import sessionmaker
# import cachetools
# import datetime
# import json
# import os
# import random
# import time
Expand Down Expand Up @@ -45,8 +44,8 @@ def clear_caches():
class SciXClassifierCelery(ADSCelery):


def __init__(self, *args, **kwargs):
pass
# def __init__(self, *args, **kwargs):
# pass

def index_record(self, record):
"""
Expand All @@ -62,12 +61,14 @@ def index_record(self, record):
# scores = Column(Text)
# created = Column(UTCDateTime, default=get_date)

# Scores Table

scores = {'scores': {cat:score for cat, score in zip(record['categories'], record['scores'])},
'earth_science_adjustment': record['earth_science_adjustment'],
'collections': record['collections']}

score_table = models.ScoreTable(bibcode=record['bibcode'],
scores=scores)
scores=json.dumps(scores))

import pdb; pdb.set_trace()

Expand Down
12 changes: 10 additions & 2 deletions ClassifierPipeline/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,22 @@
class ScoreTable(Base):
__tablename__ = 'scores'
id = Column(Integer, primary_key=True)
bibcode = Column(String(19), unique=True)
bibcode = Column(String(19))
scores = Column(Text)
created = Column(UTCDateTime, default=get_date)
override_id = Column(Integer, ForeignKey('overrides.id'))

class ModelTable(Base):
__tablename__ = 'models'
id = Column(Integer, primary_key=True)
model = Column(Text)
postprocessing = Column(Text)
created = Column(UTCDateTime, default=get_date)

class OverrideTable(Base):
__tablename__ = 'overrides'
id = Column(Integer, primary_key=True)
score_id = Column(Integer, ForeignKey('scores.id'))
# score_id = Column(Integer, ForeignKey('scores.id'))
override = Column(ARRAY(String))
created = Column(UTCDateTime, default=get_date)

Expand Down
1 change: 1 addition & 0 deletions ClassifierPipeline/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
proj_home=proj_home,
local_config=globals().get("local_config", {}),
)
# import pdb; pdb.set_trace()
# from adsputils import setup_logging, load_config
# config = load_config(proj_home=proj_home)
# logger = setup_logging('run.py', proj_home=proj_home,
Expand Down
74 changes: 74 additions & 0 deletions alembic/versions/74a83030b18d_initialize_database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""initialize database
Revision ID: 74a83030b18d
Revises:
Create Date: 2024-02-23 11:24:57.637919
"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = '74a83030b18d'
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None

def upgrade() -> None:
#Scores table
op.create_table('scores',
Column('id', Integer, primary_key=True),
Column('bibcode',String(19)),
Column('scores', Text),
Column('overrides_id', Integer),
Column('models_id', Integer),
Column('created', UTCDateTime, default=get_date()),
)
op.create_foreign_key('fk_overrides_id_scores',
'scores',
'overrides',
['overrides_id'],
['id'])
op.create_foreign_key('fk_models_id_scores',
'scores',
'models',
['models_id'],
['id'])

# Overrides table
op.create_table('overrides',
Column('id',Integer, primary_key=True),
Column('override', ARRAY(String)),
Column('created', UTCDateTime, default=get_date()),
)

# Final Collection table
op.create_table('final_collection',
Column('id', Integer, primary_key=True),
Column('score_id', Integer),
Column('collection', ARRAY(String)),
Column('created', UTCDateTime, default=get_date()),
)
op.create_foreign_key('fk_score_id_final_collection',
'final_collection',
'scores',
['score_id'],
['id'])

# Models table
op.create_table('models',
Column('id', Integer, primary_key=True),
Column('model', Text),
Column('created', UTCDateTime, default=get_date()),
)

def downgrade() -> None:
op.drop_constraint('fk_score_id_overrides', 'overrides', type_='foreignkey')
op.drop_constraint('fk_score_id_final_collection', 'final_collection', type_='foreignkey')
op.drop_table('scores')
op.drop_table('overrides')
op.drop_table('final_collection')

5 changes: 4 additions & 1 deletion run.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ def score_record(record):
record['scores'] = record['scores'][0]

# Append model information to record
record['model'] = model_dict['model']
# record['model'] = model_dict['model']
record['model'] = model_dict


# print("Record: {}".format(record['bibcode']))
Expand Down Expand Up @@ -234,6 +235,8 @@ def index_record():

# =============================== MAIN ======================================= #

# To test the classifier
# python run.py -n -r ClassifierPipeline/tests/stub_data/stub_new_records.csv

if __name__ == '__main__':

Expand Down

0 comments on commit 5fccca4

Please sign in to comment.