Skip to content

Commit

Permalink
Adding a new attribute for experiment: nucleic_acid_type
Browse files Browse the repository at this point in the history
  • Loading branch information
paulstretenowich committed Nov 20, 2023
1 parent bc43e8a commit d935efd
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 24 deletions.
169 changes: 149 additions & 20 deletions project_tracking/db_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,22 @@ def to_dict(self):
return rv

class DidNotFindError(Error):
"""DidNotFind"""
"""DidNotFindError"""
def __init__(self, message=None, table=None, attribute=None, query=None):
super().__init__(message)
if message:
self.message = message
else:
self.message = f"{table} with {attribute} {query} doesn't exist on database"
self.message = f"'{table}' with '{attribute}' '{query}' doesn't exist on database"

class RequestError(Error):
"""RequestError"""
def __init__(self, message=None, argument=None):
super().__init__(message)
if message:
self.message = message
else:
self.message = f"For current request '{argument}' is required"

def name_to_id(model_class, name, session=None):
"""
Expand Down Expand Up @@ -590,6 +599,7 @@ def ingest_run_processing(project_id: str, ingest_data, session=None):
experiment = Experiment.from_attributes(
sequencing_technology=readset_json[vb.EXPERIMENT_SEQUENCING_TECHNOLOGY],
type=readset_json[vb.EXPERIMENT_TYPE],
nucleic_acid_type=readset_json[vb.EXPERIMENT_NUCLEIC_ACID_TYPE],
library_kit=readset_json[vb.EXPERIMENT_LIBRARY_KIT],
kit_expiration_date=kit_expiration_date,
session=session
Expand Down Expand Up @@ -752,6 +762,7 @@ def digest_readset_file(project_id: str, digest_data, session=None):
if not session:
session = database.get_session()

patients = []
samples = []
readsets = []
output = []
Expand All @@ -760,21 +771,73 @@ def digest_readset_file(project_id: str, digest_data, session=None):
}

location_endpoint = None

if vb.LOCATION_ENDPOINT in digest_data.keys():
location_endpoint = digest_data[vb.LOCATION_ENDPOINT]

if vb.EXPERIMENT_NUCLEIC_ACID_TYPE in digest_data.keys():
nucleic_acid_type = digest_data[vb.EXPERIMENT_NUCLEIC_ACID_TYPE]
else:
raise RequestError(argument="experiment_nucleic_acid_type")

if vb.PATIENT_NAME in digest_data.keys():
for patient_name in digest_data[vb.PATIENT_NAME]:
patient = session.scalars(
select(Patient)
.where(Patient.name == patient_name)
.join(Patient.samples)
.join(Sample.readsets)
.join(Readset.experiment)
.where(Experiment.nucleic_acid_type == nucleic_acid_type)
).unique().first()
if patient:
patients.append(patient)
else:
raise DidNotFindError(table="Patient", attribute="name", query=patient_name)
if vb.PATIENT_ID in digest_data.keys():
for patient_id in digest_data[vb.PATIENT_ID]:
# logger.debug(f"\n\n{patient_id}\n\n")
patient = session.scalars(
select(Patient)
.where(Patient.id == patient_id)
.join(Patient.samples)
.join(Sample.readsets)
.join(Readset.experiment)
.where(Experiment.nucleic_acid_type == nucleic_acid_type)
).unique().first()
if patient:
patients.append(patient)
else:
raise DidNotFindError(table="Patient", attribute="id", query=patient_id)
if patients:
set(patients)
for patient in patients:
for sample in patient.samples:
for readset in sample.readsets:
readsets.append(readset)

if vb.SAMPLE_NAME in digest_data.keys():
for sample_name in digest_data[vb.SAMPLE_NAME]:
sample = session.scalars(select(Sample).where(Sample.name == sample_name)).unique().first()
sample = session.scalars(
select(Sample)
.where(Sample.name == sample_name)
.join(Sample.readsets)
.join(Readset.experiment)
.where(Experiment.nucleic_acid_type == nucleic_acid_type)
).unique().first()
if sample:
samples.append(sample)
else:
raise DidNotFindError(table="Sample", attribute="name", query=sample_name)
if vb.SAMPLE_ID in digest_data.keys():
for sample_id in digest_data[vb.SAMPLE_ID]:
# logger.debug(f"\n\n{sample_id}\n\n")
sample = session.scalars(select(Sample).where(Sample.id == sample_id)).unique().first()
sample = session.scalars(
select(Sample)
.where(Sample.id == sample_id)
.join(Sample.readsets)
.join(Readset.experiment)
.where(Experiment.nucleic_acid_type == nucleic_acid_type)
).unique().first()
if sample:
samples.append(sample)
else:
Expand All @@ -784,16 +847,27 @@ def digest_readset_file(project_id: str, digest_data, session=None):
for sample in samples:
for readset in sample.readsets:
readsets.append(readset)

if vb.READSET_NAME in digest_data.keys():
for readset_name in digest_data[vb.READSET_NAME]:
readset = session.scalars(select(Readset).where(Readset.name == readset_name)).unique().first()
readset = session.scalars(
select(Readset)
.where(Readset.name == readset_name)
.join(Readset.experiment)
.where(Experiment.nucleic_acid_type == nucleic_acid_type)
).unique().first()
if readset:
readsets.append(readset)
else:
raise DidNotFindError(table="Readset", attribute="name", query=readset_name)
if vb.READSET_ID in digest_data.keys():
for readset_id in digest_data[vb.READSET_ID]:
readset = session.scalars(select(Readset).where(Readset.id == readset_id)).unique().first()
readset = session.scalars(
select(Readset)
.where(Readset.id == readset_id)
.join(Readset.experiment)
.where(Experiment.nucleic_acid_type == nucleic_acid_type)
).unique().first()
if readset:
readsets.append(readset)
else:
Expand All @@ -802,7 +876,6 @@ def digest_readset_file(project_id: str, digest_data, session=None):
set(readsets)
for readset in readsets:
readset_files = []
logger.debug(f"\n\n{readset}\n\n")
bed = None
fastq1 = None
fastq2 = None
Expand Down Expand Up @@ -869,35 +942,93 @@ def digest_pair_file(project_id: str, digest_data, session=None):
# readsets = []
output = []

if vb.EXPERIMENT_NUCLEIC_ACID_TYPE in digest_data.keys():
nucleic_acid_type = digest_data[vb.EXPERIMENT_NUCLEIC_ACID_TYPE]
else:
raise RequestError(argument="experiment_nucleic_acid_type")

if vb.PATIENT_NAME in digest_data.keys():
for patient_name in digest_data[vb.PATIENT_NAME]:
patient = session.scalars(
select(Patient)
.where(Patient.name == patient_name)
.join(Patient.samples)
.join(Sample.readsets)
.join(Readset.experiment)
.where(Experiment.nucleic_acid_type == nucleic_acid_type)
).unique().first()
if patient:
patients.append(patient)
else:
raise DidNotFindError(table="Patient", attribute="name", query=patient_name)
if vb.PATIENT_ID in digest_data.keys():
for patient_id in digest_data[vb.PATIENT_ID]:
patient = session.scalars(
select(Patient)
.where(Patient.id == patient_id)
.join(Patient.samples)
.join(Sample.readsets)
.join(Readset.experiment)
.where(Experiment.nucleic_acid_type == nucleic_acid_type)
).unique().first()
if patient:
patients.append(patient)
else:
raise DidNotFindError(table="Patient", attribute="id", query=patient_id)
if patients:
set(patients)
for patient in patients:
for sample in patient.samples:
samples.append(sample)

if vb.SAMPLE_NAME in digest_data.keys():
for sample_name in digest_data[vb.SAMPLE_NAME]:
sample = session.scalars(select(Sample).where(Sample.name == sample_name)).unique().first()
# logger.info(f"\n\n{sample}\n\n")
sample = session.scalars(
select(Sample)
.where(Sample.name == sample_name)
.join(Sample.readsets)
.join(Readset.experiment)
.where(Experiment.nucleic_acid_type == nucleic_acid_type)
).unique().first()
if sample:
samples.append(sample)
else:
raise DidNotFindError(table="Sample", attribute="name", query=sample_name)
if vb.SAMPLE_ID in digest_data.keys():
for sample_id in digest_data[vb.SAMPLE_ID]:
sample = session.scalars(select(Sample).where(Sample.id == sample_id)).unique().first()
sample = session.scalars(
select(Sample)
.where(Sample.id == sample_id)
.join(Sample.readsets)
.join(Readset.experiment)
.where(Experiment.nucleic_acid_type == nucleic_acid_type)
).unique().first()
if sample:
samples.append(sample)
else:
raise DidNotFindError(table="Sample", attribute="id", query=sample_id)
if vb.READSET_NAME in digest_data.keys():
for readset_name in digest_data[vb.READSET_NAME]:
readset = session.scalars(select(Readset).where(Readset.name == readset_name)).unique().first()
readset = session.scalars(
select(Readset)
.where(Readset.name == readset_name)
.join(Readset.experiment)
.where(Experiment.nucleic_acid_type == nucleic_acid_type)
).unique().first()
if readset:
samples.append(readset.sample)
# readsets.append(readset)
else:
raise DidNotFindError(table="Readset", attribute="name", query=readset_name)
if vb.READSET_ID in digest_data.keys():
for readset_id in digest_data[vb.READSET_ID]:
readset = session.scalars(select(Readset).where(Readset.id == readset_id)).unique().first()
readset = session.scalars(
select(Readset)
.where(Readset.id == readset_id)
.join(Readset.experiment)
.where(Experiment.nucleic_acid_type == nucleic_acid_type)
).unique().first()
if readset:
samples.append(readset.sample)
# readsets.append(readset)
else:
raise DidNotFindError(table="Readset", attribute="id", query=readset_id)
if samples:
Expand Down Expand Up @@ -1066,7 +1197,7 @@ def digest_unanalyzed(project_id: str, digest_data, session=None):
run_name = digest_data["run_name"]
if run_name:
run_id = name_to_id("Run", run_name)[0]
experiment_sequencing_technology = digest_data["experiment_sequencing_technology"]
experiment_nucleic_acid_type = digest_data["experiment_nucleic_acid_type"]
location_endpoint = digest_data["location_endpoint"]

if sample_name_flag:
Expand Down Expand Up @@ -1096,17 +1227,15 @@ def digest_unanalyzed(project_id: str, digest_data, session=None):
stmt.where(Run.id == run_id)
.join(Readset.run)
)
if experiment_sequencing_technology:
if experiment_nucleic_acid_type:
stmt = (
stmt.where(Experiment.sequencing_technology == experiment_sequencing_technology)
stmt.where(Experiment.nucleic_acid_type == experiment_nucleic_acid_type)
.join(Readset.experiment)
)

# logger.debug(f"\n\n{stmt}\n\n")
output = {
"location_endpoint": location_endpoint,
key: session.scalars(stmt).unique().all()
}
# logger.debug(f"\n\n{session.scalars(stmt).unique().all()}\n\n")

return json.dumps(output)
25 changes: 24 additions & 1 deletion project_tracking/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@

from . import database

class NucleicAcidTypeEnum(enum.Enum):
"""nucleic_acid_type enum"""
DNA = "DNA"
RNA = "RNA"


class LaneEnum(enum.Enum):
"""
lane enum
Expand All @@ -43,6 +49,10 @@ class LaneEnum(enum.Enum):
TWO = "2"
THREE = "3"
FOUR = "4"
FIVE = "5"
SIX = "6"
SEVEN = "7"
EIGHT = "8"


class SequencingTypeEnum(enum.Enum):
Expand Down Expand Up @@ -90,6 +100,7 @@ class Base(DeclarativeBase):
# this is needed for the enum to work properly right now
# see https://github.com/sqlalchemy/sqlalchemy/discussions/8856
type_annotation_map = {
NucleicAcidTypeEnum: Enum(NucleicAcidTypeEnum),
LaneEnum: Enum(LaneEnum),
SequencingTypeEnum: Enum(SequencingTypeEnum),
StatusEnum: Enum(StatusEnum),
Expand Down Expand Up @@ -346,6 +357,7 @@ class Experiment(BaseTable):
id integer [PK]
sequencing_technology text
type text
nucleic_acid_type nucleic_acid_type
library_kit text
kit_expiration_date text
deprecated boolean
Expand All @@ -358,13 +370,22 @@ class Experiment(BaseTable):

sequencing_technology: Mapped[str] = mapped_column(default=None, nullable=True)
type: Mapped[str] = mapped_column(default=None, nullable=True)
nucleic_acid_type: Mapped[NucleicAcidTypeEnum] = mapped_column(default=None, nullable=False)
library_kit: Mapped[str] = mapped_column(default=None, nullable=True)
kit_expiration_date: Mapped[datetime] = mapped_column(default=None, nullable=True)

readsets: Mapped[list["Readset"]] = relationship(back_populates="experiment")

@classmethod
def from_attributes(cls, sequencing_technology=None, type=None, library_kit=None, kit_expiration_date=None, session=None):
def from_attributes(
cls,
nucleic_acid_type,
sequencing_technology=None,
type=None,
library_kit=None,
kit_expiration_date=None,
session=None
):
"""
get experiment if it exist, set it if it does not exist
"""
Expand All @@ -374,13 +395,15 @@ def from_attributes(cls, sequencing_technology=None, type=None, library_kit=None
select(cls)
.where(cls.sequencing_technology == sequencing_technology)
.where(cls.type == type)
.where(cls.nucleic_acid_type == nucleic_acid_type)
.where(cls.library_kit == library_kit)
.where(cls.kit_expiration_date == kit_expiration_date)
).first()
if not experiment:
experiment = cls(
sequencing_technology=sequencing_technology,
type=type,
nucleic_acid_type=nucleic_acid_type,
library_kit=library_kit,
kit_expiration_date=kit_expiration_date
)
Expand Down
2 changes: 2 additions & 0 deletions project_tracking/vocabulary.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

# patient table
PATIENT = "patient"
PATIENT_ID = "patient_id"
PATIENT_FMS_ID = "patient_fms_id"
PATIENT_NAME = "patient_name"
PATIENT_COHORT = "patient_cohort"
Expand All @@ -25,6 +26,7 @@
# experiment table
EXPERIMENT_SEQUENCING_TECHNOLOGY = "experiment_sequencing_technology"
EXPERIMENT_TYPE = "experiment_type"
EXPERIMENT_NUCLEIC_ACID_TYPE = "experiment_nucleic_acid_type"
EXPERIMENT_LIBRARY_KIT = "experiment_library_kit"
EXPERIMENT_KIT_EXPIRATION_DATE = "experiment_kit_expiration_date"
EXPERIMENT_TYPE_LIST = ["PCR-FREE", "RNASEQ"]
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def pre_filled_model():
project=project)

sequencing_technology = 'Fancy Buzzword'
exp = model.Experiment(sequencing_technology=sequencing_technology)
exp = model.Experiment(nucleic_acid_type=model.NucleicAcidTypeEnum.DNA)
pa_name = "P_O"
pa = model.Patient(name=pa_name, project=project)
sa_name = 'gros_bobo'
Expand Down
Loading

0 comments on commit d935efd

Please sign in to comment.