diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py index 4f3ee18..a163a82 100644 --- a/project_tracking/db_action.py +++ b/project_tracking/db_action.py @@ -53,13 +53,22 @@ def to_dict(self): return rv class DidNotFindError(Error): - """DidNotFind""" + """DidNotFindError""" def __init__(self, message=None, table=None, attribute=None, query=None): super().__init__(message) if message: self.message = message else: - self.message = f"{table} with {attribute} {query} doesn't exist on database" + self.message = f"'{table}' with '{attribute}' '{query}' doesn't exist on database" + +class RequestError(Error): + """RequestError""" + def __init__(self, message=None, argument=None): + super().__init__(message) + if message: + self.message = message + else: + self.message = f"For current request '{argument}' is required" def name_to_id(model_class, name, session=None): """ @@ -590,6 +599,7 @@ def ingest_run_processing(project_id: str, ingest_data, session=None): experiment = Experiment.from_attributes( sequencing_technology=readset_json[vb.EXPERIMENT_SEQUENCING_TECHNOLOGY], type=readset_json[vb.EXPERIMENT_TYPE], + nucleic_acid_type=readset_json[vb.EXPERIMENT_NUCLEIC_ACID_TYPE], library_kit=readset_json[vb.EXPERIMENT_LIBRARY_KIT], kit_expiration_date=kit_expiration_date, session=session @@ -752,6 +762,7 @@ def digest_readset_file(project_id: str, digest_data, session=None): if not session: session = database.get_session() + patients = [] samples = [] readsets = [] output = [] @@ -760,13 +771,59 @@ def digest_readset_file(project_id: str, digest_data, session=None): } location_endpoint = None - if vb.LOCATION_ENDPOINT in digest_data.keys(): location_endpoint = digest_data[vb.LOCATION_ENDPOINT] + if vb.EXPERIMENT_NUCLEIC_ACID_TYPE in digest_data.keys(): + nucleic_acid_type = digest_data[vb.EXPERIMENT_NUCLEIC_ACID_TYPE] + else: + raise RequestError(argument="experiment_nucleic_acid_type") + + if vb.PATIENT_NAME in digest_data.keys(): + for patient_name in digest_data[vb.PATIENT_NAME]: + patient = session.scalars( + select(Patient) + .where(Patient.name == patient_name) + .join(Patient.samples) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() + if patient: + patients.append(patient) + else: + raise DidNotFindError(table="Patient", attribute="name", query=patient_name) + if vb.PATIENT_ID in digest_data.keys(): + for patient_id in digest_data[vb.PATIENT_ID]: + # logger.debug(f"\n\n{patient_id}\n\n") + patient = session.scalars( + select(Patient) + .where(Patient.id == patient_id) + .join(Patient.samples) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() + if patient: + patients.append(patient) + else: + raise DidNotFindError(table="Patient", attribute="id", query=patient_id) + if patients: + set(patients) + for patient in patients: + for sample in patient.samples: + for readset in sample.readsets: + readsets.append(readset) + if vb.SAMPLE_NAME in digest_data.keys(): for sample_name in digest_data[vb.SAMPLE_NAME]: - sample = session.scalars(select(Sample).where(Sample.name == sample_name)).unique().first() + sample = session.scalars( + select(Sample) + .where(Sample.name == sample_name) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if sample: samples.append(sample) else: @@ -774,7 +831,13 @@ def digest_readset_file(project_id: str, digest_data, session=None): if vb.SAMPLE_ID in digest_data.keys(): for sample_id in digest_data[vb.SAMPLE_ID]: # logger.debug(f"\n\n{sample_id}\n\n") - sample = session.scalars(select(Sample).where(Sample.id == sample_id)).unique().first() + sample = session.scalars( + select(Sample) + .where(Sample.id == sample_id) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if sample: samples.append(sample) else: @@ -784,16 +847,27 @@ def digest_readset_file(project_id: str, digest_data, session=None): for sample in samples: for readset in sample.readsets: readsets.append(readset) + if vb.READSET_NAME in digest_data.keys(): for readset_name in digest_data[vb.READSET_NAME]: - readset = session.scalars(select(Readset).where(Readset.name == readset_name)).unique().first() + readset = session.scalars( + select(Readset) + .where(Readset.name == readset_name) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if readset: readsets.append(readset) else: raise DidNotFindError(table="Readset", attribute="name", query=readset_name) if vb.READSET_ID in digest_data.keys(): for readset_id in digest_data[vb.READSET_ID]: - readset = session.scalars(select(Readset).where(Readset.id == readset_id)).unique().first() + readset = session.scalars( + select(Readset) + .where(Readset.id == readset_id) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if readset: readsets.append(readset) else: @@ -802,7 +876,6 @@ def digest_readset_file(project_id: str, digest_data, session=None): set(readsets) for readset in readsets: readset_files = [] - logger.debug(f"\n\n{readset}\n\n") bed = None fastq1 = None fastq2 = None @@ -869,35 +942,93 @@ def digest_pair_file(project_id: str, digest_data, session=None): # readsets = [] output = [] + if vb.EXPERIMENT_NUCLEIC_ACID_TYPE in digest_data.keys(): + nucleic_acid_type = digest_data[vb.EXPERIMENT_NUCLEIC_ACID_TYPE] + else: + raise RequestError(argument="experiment_nucleic_acid_type") + + if vb.PATIENT_NAME in digest_data.keys(): + for patient_name in digest_data[vb.PATIENT_NAME]: + patient = session.scalars( + select(Patient) + .where(Patient.name == patient_name) + .join(Patient.samples) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() + if patient: + patients.append(patient) + else: + raise DidNotFindError(table="Patient", attribute="name", query=patient_name) + if vb.PATIENT_ID in digest_data.keys(): + for patient_id in digest_data[vb.PATIENT_ID]: + patient = session.scalars( + select(Patient) + .where(Patient.id == patient_id) + .join(Patient.samples) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() + if patient: + patients.append(patient) + else: + raise DidNotFindError(table="Patient", attribute="id", query=patient_id) + if patients: + set(patients) + for patient in patients: + for sample in patient.samples: + samples.append(sample) + if vb.SAMPLE_NAME in digest_data.keys(): for sample_name in digest_data[vb.SAMPLE_NAME]: - sample = session.scalars(select(Sample).where(Sample.name == sample_name)).unique().first() - # logger.info(f"\n\n{sample}\n\n") + sample = session.scalars( + select(Sample) + .where(Sample.name == sample_name) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if sample: samples.append(sample) else: raise DidNotFindError(table="Sample", attribute="name", query=sample_name) if vb.SAMPLE_ID in digest_data.keys(): for sample_id in digest_data[vb.SAMPLE_ID]: - sample = session.scalars(select(Sample).where(Sample.id == sample_id)).unique().first() + sample = session.scalars( + select(Sample) + .where(Sample.id == sample_id) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if sample: samples.append(sample) else: raise DidNotFindError(table="Sample", attribute="id", query=sample_id) if vb.READSET_NAME in digest_data.keys(): for readset_name in digest_data[vb.READSET_NAME]: - readset = session.scalars(select(Readset).where(Readset.name == readset_name)).unique().first() + readset = session.scalars( + select(Readset) + .where(Readset.name == readset_name) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if readset: samples.append(readset.sample) - # readsets.append(readset) else: raise DidNotFindError(table="Readset", attribute="name", query=readset_name) if vb.READSET_ID in digest_data.keys(): for readset_id in digest_data[vb.READSET_ID]: - readset = session.scalars(select(Readset).where(Readset.id == readset_id)).unique().first() + readset = session.scalars( + select(Readset) + .where(Readset.id == readset_id) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if readset: samples.append(readset.sample) - # readsets.append(readset) else: raise DidNotFindError(table="Readset", attribute="id", query=readset_id) if samples: @@ -1066,7 +1197,7 @@ def digest_unanalyzed(project_id: str, digest_data, session=None): run_name = digest_data["run_name"] if run_name: run_id = name_to_id("Run", run_name)[0] - experiment_sequencing_technology = digest_data["experiment_sequencing_technology"] + experiment_nucleic_acid_type = digest_data["experiment_nucleic_acid_type"] location_endpoint = digest_data["location_endpoint"] if sample_name_flag: @@ -1096,17 +1227,15 @@ def digest_unanalyzed(project_id: str, digest_data, session=None): stmt.where(Run.id == run_id) .join(Readset.run) ) - if experiment_sequencing_technology: + if experiment_nucleic_acid_type: stmt = ( - stmt.where(Experiment.sequencing_technology == experiment_sequencing_technology) + stmt.where(Experiment.nucleic_acid_type == experiment_nucleic_acid_type) .join(Readset.experiment) ) - # logger.debug(f"\n\n{stmt}\n\n") output = { "location_endpoint": location_endpoint, key: session.scalars(stmt).unique().all() } - # logger.debug(f"\n\n{session.scalars(stmt).unique().all()}\n\n") return json.dumps(output) diff --git a/project_tracking/model.py b/project_tracking/model.py index 2b74951..8ad1ee6 100644 --- a/project_tracking/model.py +++ b/project_tracking/model.py @@ -35,6 +35,12 @@ from . import database +class NucleicAcidTypeEnum(enum.Enum): + """nucleic_acid_type enum""" + DNA = "DNA" + RNA = "RNA" + + class LaneEnum(enum.Enum): """ lane enum @@ -43,6 +49,10 @@ class LaneEnum(enum.Enum): TWO = "2" THREE = "3" FOUR = "4" + FIVE = "5" + SIX = "6" + SEVEN = "7" + EIGHT = "8" class SequencingTypeEnum(enum.Enum): @@ -90,6 +100,7 @@ class Base(DeclarativeBase): # this is needed for the enum to work properly right now # see https://github.com/sqlalchemy/sqlalchemy/discussions/8856 type_annotation_map = { + NucleicAcidTypeEnum: Enum(NucleicAcidTypeEnum), LaneEnum: Enum(LaneEnum), SequencingTypeEnum: Enum(SequencingTypeEnum), StatusEnum: Enum(StatusEnum), @@ -346,6 +357,7 @@ class Experiment(BaseTable): id integer [PK] sequencing_technology text type text + nucleic_acid_type nucleic_acid_type library_kit text kit_expiration_date text deprecated boolean @@ -358,13 +370,22 @@ class Experiment(BaseTable): sequencing_technology: Mapped[str] = mapped_column(default=None, nullable=True) type: Mapped[str] = mapped_column(default=None, nullable=True) + nucleic_acid_type: Mapped[NucleicAcidTypeEnum] = mapped_column(default=None, nullable=False) library_kit: Mapped[str] = mapped_column(default=None, nullable=True) kit_expiration_date: Mapped[datetime] = mapped_column(default=None, nullable=True) readsets: Mapped[list["Readset"]] = relationship(back_populates="experiment") @classmethod - def from_attributes(cls, sequencing_technology=None, type=None, library_kit=None, kit_expiration_date=None, session=None): + def from_attributes( + cls, + nucleic_acid_type, + sequencing_technology=None, + type=None, + library_kit=None, + kit_expiration_date=None, + session=None + ): """ get experiment if it exist, set it if it does not exist """ @@ -374,6 +395,7 @@ def from_attributes(cls, sequencing_technology=None, type=None, library_kit=None select(cls) .where(cls.sequencing_technology == sequencing_technology) .where(cls.type == type) + .where(cls.nucleic_acid_type == nucleic_acid_type) .where(cls.library_kit == library_kit) .where(cls.kit_expiration_date == kit_expiration_date) ).first() @@ -381,6 +403,7 @@ def from_attributes(cls, sequencing_technology=None, type=None, library_kit=None experiment = cls( sequencing_technology=sequencing_technology, type=type, + nucleic_acid_type=nucleic_acid_type, library_kit=library_kit, kit_expiration_date=kit_expiration_date ) diff --git a/project_tracking/vocabulary.py b/project_tracking/vocabulary.py index 90d8cfc..28920d5 100644 --- a/project_tracking/vocabulary.py +++ b/project_tracking/vocabulary.py @@ -10,6 +10,7 @@ # patient table PATIENT = "patient" +PATIENT_ID = "patient_id" PATIENT_FMS_ID = "patient_fms_id" PATIENT_NAME = "patient_name" PATIENT_COHORT = "patient_cohort" @@ -25,6 +26,7 @@ # experiment table EXPERIMENT_SEQUENCING_TECHNOLOGY = "experiment_sequencing_technology" EXPERIMENT_TYPE = "experiment_type" +EXPERIMENT_NUCLEIC_ACID_TYPE = "experiment_nucleic_acid_type" EXPERIMENT_LIBRARY_KIT = "experiment_library_kit" EXPERIMENT_KIT_EXPIRATION_DATE = "experiment_kit_expiration_date" EXPERIMENT_TYPE_LIST = ["PCR-FREE", "RNASEQ"] diff --git a/tests/conftest.py b/tests/conftest.py index 5a9d125..99b407a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -28,7 +28,7 @@ def pre_filled_model(): project=project) sequencing_technology = 'Fancy Buzzword' - exp = model.Experiment(sequencing_technology=sequencing_technology) + exp = model.Experiment(nucleic_acid_type=model.NucleicAcidTypeEnum.DNA) pa_name = "P_O" pa = model.Patient(name=pa_name, project=project) sa_name = 'gros_bobo' diff --git a/tests/data/run_processing.json b/tests/data/run_processing.json index 4ff31b8..5e85ebd 100644 --- a/tests/data/run_processing.json +++ b/tests/data/run_processing.json @@ -21,6 +21,7 @@ { "experiment_sequencing_technology": null, "experiment_type": "PCR-free", + "experiment_nucleic_acid_type": "DNA", "experiment_library_kit": null, "experiment_kit_expiration_date": null, "readset_name": "MoHQ-JG-9-23-15000863775-19933DT.A01433_0157_1", @@ -73,6 +74,7 @@ { "experiment_sequencing_technology": null, "experiment_type": "PCR-free", + "experiment_nucleic_acid_type": "DNA", "experiment_library_kit": null, "experiment_kit_expiration_date": null, "readset_name": "MoHQ-JG-9-23-15000936286-19866DN.A01433_0157_2", @@ -133,6 +135,7 @@ { "experiment_sequencing_technology": null, "experiment_type": "RNASeq", + "experiment_nucleic_acid_type": "RNA", "experiment_library_kit": null, "experiment_kit_expiration_date": null, "readset_name": "MoHQ-CM-1-3-6929-1RT.A01433_0157_3", @@ -191,6 +194,7 @@ { "experiment_sequencing_technology": null, "experiment_type": "PCR-free", + "experiment_nucleic_acid_type": "DNA", "experiment_library_kit": null, "experiment_kit_expiration_date": null, "readset_name": "MoHQ-CM-1-3-15000863775-19933DT.A01433_0157_1", @@ -243,6 +247,7 @@ { "experiment_sequencing_technology": null, "experiment_type": "PCR-free", + "experiment_nucleic_acid_type": "DNA", "experiment_library_kit": null, "experiment_kit_expiration_date": null, "readset_name": "MoHQ-CM-1-3-15000936286-19866DN.A01433_0157_2", diff --git a/tests/test_serialization.py b/tests/test_serialization.py index fa75b15..473c8c9 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -8,7 +8,6 @@ def test_serialization(not_app_db): op_config_version = 0.1 op_config_name = 'generic_index' op_name = 'ingest' - sequencing_technology = 'Fancy Buzzword' pa_name = "P_O" sa_name = 'gros_bobo' ru_name = "cure the Conglomerat old director's partner 01" @@ -28,7 +27,7 @@ def test_serialization(not_app_db): operation_config=op_c, project=project) - exp = model.Experiment(sequencing_technology=sequencing_technology) + exp = model.Experiment(nucleic_acid_type=model.NucleicAcidTypeEnum.DNA) pa = model.Patient(name=pa_name, project=project) sa = model.Sample(name=sa_name, patient=pa) ru = model.Run(instrument=instrument, name=ru_name)