From a1d4448ddb1888bab7c9bde6270d4b5025979a7e Mon Sep 17 00:00:00 2001 From: Paul STRETENOWICH <31796146+paulstretenowich@users.noreply.github.com> Date: Mon, 15 Jan 2024 11:53:28 -0500 Subject: [PATCH] Dev (#23) undefined --- .github/workflows/run_test.yml | 38 ++++- Dockerfile => Containerfile | 3 +- project_tracking/api/project.py | 83 ++++----- project_tracking/db_action.py | 290 ++++++++++++++++++++++++-------- project_tracking/model.py | 25 ++- project_tracking/vocabulary.py | 2 + tests/conftest.py | 2 +- tests/data/run_processing.json | 5 + tests/test_serialization.py | 3 +- 9 files changed, 325 insertions(+), 126 deletions(-) rename Dockerfile => Containerfile (85%) diff --git a/.github/workflows/run_test.yml b/.github/workflows/run_test.yml index 0222484..14ad43c 100644 --- a/.github/workflows/run_test.yml +++ b/.github/workflows/run_test.yml @@ -11,9 +11,17 @@ on: - 'main' - 'dev' -jobs: - build: + tags: + - '[0-9]+.[0-9]+.[0-9]+' + +env: + REGISTRY_USER: c3genomics+github_pusher + IMAGE_REGISTRY: quay.io + REGISTRY_PASSWORD: ${{ secrets.QUAY_ROBOT_TOKEN }} + IMAGE: c3genomics/project_tracking +jobs: + test: runs-on: ${{ matrix.os }} strategy: matrix: @@ -41,3 +49,29 @@ jobs: - name: Test with pytest run: | pytest -v + build: + needs: test + if: startsWith(github.ref, 'refs/tags') + name: Build image + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: set tag + run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV + - name: Buildah Action + uses: redhat-actions/buildah-build@v2 + with: + image: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE }} + tags: ${{ env.RELEASE_VERSION }} latest_release + containerfiles: ./Containerfile + - name: Push to repo + uses: redhat-actions/push-to-registry@v2 + with: + username: ${{ env.REGISTRY_USER }} + password: ${{ env.REGISTRY_PASSWORD }} + registry: ${{ env.IMAGE_REGISTRY }} + image: ${{ env.IMAGE }} + tags: ${{ env.RELEASE_VERSION }} latest_release + - name: Print image url + run: echo "Image pushed to ${{ steps.push-to-repo.outputs.registry-paths }}" + diff --git a/Dockerfile b/Containerfile similarity index 85% rename from Dockerfile rename to Containerfile index 5a421ca..55e7aaf 100644 --- a/Dockerfile +++ b/Containerfile @@ -1,4 +1,5 @@ -FROM fedora:36 +FROM fedora:39 +MAINTAINER P-O Quirion po.quirion@mcgill.ca ENV APP=project_tracking RUN mkdir /app /sqlite diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py index c9ceb47..4edd7b1 100644 --- a/project_tracking/api/project.py +++ b/project_tracking/api/project.py @@ -64,7 +64,8 @@ def sanity_check(item, action_output): @convcheck_project def projects(project_id: str = None): """ - project: uses the form "/project/1" for project ID and "/project/name" for project name + GET: + project: uses the form "/project/1" for project ID and "/project/name" for project name return: list of all the details of the poject with name "project_name" or ID "project_id" """ @@ -81,8 +82,9 @@ def projects(project_id: str = None): @convcheck_project def patients(project_id: str, patient_id: str = None): """ - patient_id: uses the form "1,3-8,9" - return: list all patient or selected patient that are also par of + GET: + patient_id: uses the form "1,3-8,9", if not provided all patients are returned + return: list all patients or selected patients, belonging to Query: (pair, tumor): Default (None, true) @@ -93,7 +95,7 @@ def patients(project_id: str, patient_id: str = None): Return: a subset of patient who have Tumor=False & Tumor=True samples (false, true): return: a subset of patient who only have Tumor=True samples - (false, true): + (false, false): return: a subset of patient who only have Tumor=false samples """ @@ -144,8 +146,9 @@ def patients(project_id: str, patient_id: str = None): @convcheck_project def samples(project_id: str, sample_id: str = None): """ - sample_id: uses the form "1,3-8,9", if not provides, all sample are returned - return: all or selected sample that are in sample_id and part of project + GET: + sample_id: uses the form "1,3-8,9", if not provided all samples are returned + return: list all patients or selected samples, belonging to """ query = request.args @@ -174,8 +177,9 @@ def samples(project_id: str, sample_id: str = None): @convcheck_project def readsets(project_id: str, readset_id: str=None): """ - readset_id: uses the form "1,3-8,9", if not provided, all readsets are returned - return: selected readsets that are in sample_id and part of project + GET: + readset_id: uses the form "1,3-8,9", if not provided all readsets are returned + return: list all patients or selected readsets, belonging to """ query = request.args @@ -207,12 +211,12 @@ def readsets(project_id: str, readset_id: str=None): @convcheck_project def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id: str=None, file_id: str=None): """ - file_id: uses the form "1,3-8,9". Select file by ids - patient_id: uses the form "1,3-8,9". Select file by patient ids - sample_id: uses the form "1,3-8,9". Select file by sample ids - redeaset_id: uses the form "1,3-8,9". Select file by readset ids - - return: selected files + GET: + file_id: uses the form "1,3-8,9". Select file by ids + patient_id: uses the form "1,3-8,9". Select file by patient ids + sample_id: uses the form "1,3-8,9". Select file by sample ids + redeaset_id: uses the form "1,3-8,9". Select file by readset ids + return: selected files, belonging to Query: (deliverable): Default (None) @@ -276,19 +280,19 @@ def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id @convcheck_project def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_id: str=None, metric_id: str=None): """ - metric_id: uses the form "1,3-8,9". Select metric by ids - patient_id: uses the form "1,3-8,9". Select metric by patient ids - sample_id: uses the form "1,3-8,9". Select metric by sample ids - redeaset_id: uses the form "1,3-8,9". Select metric by readset ids - - We also accespt POST data with comma separeted list + GET: + metric_id: uses the form "1,3-8,9". Select metric by ids + patient_id: uses the form "1,3-8,9". Select metric by patient ids + sample_id: uses the form "1,3-8,9". Select metric by sample ids + redeaset_id: uses the form "1,3-8,9". Select metric by readset ids + return: selected metrics, belonging to + + We also accept POST data with comma separeted list metric_name = [,NAME] [...] readset_name = [,NAME] [...] sample_name = [,NAME] [...] patient_name = [,NAME] [...] - return: selected metrics - Query: (deliverable): Default (None) The deliverable query allows to get all metrics labelled as deliverable @@ -360,8 +364,9 @@ def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_ @convcheck_project def readsets_from_samples(project_id: str, sample_id: str): """ - sample_id: uses the form "1,3-8,9" - return: readsets for selected sample_id + GET: + sample_id: uses the form "1,3-8,9" + return: selected readsets belonging to """ query = request.args @@ -382,14 +387,14 @@ def readsets_from_samples(project_id: str, sample_id: str): action_output = db_action.readsets(project_id, sample_id) - return sanity_check("Metric", action_output) + return sanity_check("Readset", action_output) @bp.route('//digest_readset_file', methods=['POST']) @convcheck_project def digest_readset_file(project_id: str): """ - POST: list of Readset/Sample Name or id + POST: json holding the list of Patient/Sample/Readset Name or id AND location endpoint + experiment nucleic_acid_type return: all information to create a "Genpipes readset file" """ @@ -410,7 +415,7 @@ def digest_readset_file(project_id: str): @convcheck_project def digest_pair_file(project_id: str): """ - POST: list of Readset/Sample Name or id + POST: json holding the list of Patient/Sample/Readset Name or id AND location endpoint + experiment nucleic_acid_type return: all information to create a "Genpipes pair file" """ @@ -427,7 +432,7 @@ def digest_pair_file(project_id: str): return db_action.digest_pair_file(project_id=project_id, digest_data=ingest_data) -@bp.route('//ingest_run_processing', methods=['GET', 'POST']) +@bp.route('//ingest_run_processing', methods=['POST']) @convcheck_project def ingest_run_processing(project_id: str): """ @@ -435,13 +440,6 @@ def ingest_run_processing(project_id: str): return: The Operation object """ - # Is this if required? - if request.method == 'GET': - return abort( - 405, - "Use post method to ingest runs" - ) - if request.method == 'POST': try: ingest_data = request.get_json(force=True) @@ -480,21 +478,14 @@ def ingest_transfer(project_id: str): return [i.flat_dict for i in db_action.ingest_transfer(project_id=project_id, ingest_data=ingest_data)] -@bp.route('//ingest_genpipes', methods=['GET', 'POST']) +@bp.route('//ingest_genpipes', methods=['POST']) @convcheck_project def ingest_genpipes(project_id: str): """ - POST: json describing genpipes + POST: json describing genpipes analysis return: The Operation object and Jobs associated """ - # Is this if required? - if request.method == 'GET': - return abort( - 405, - "Use post method to ingest genpipes analysis" - ) - if request.method == 'POST': try: ingest_data = request.get_json(force=True) @@ -520,8 +511,8 @@ def ingest_genpipes(project_id: str): @convcheck_project def digest_unanalyzed(project_id: str): """ - POST: list of Readset/Sample Name or id - return: Readsets or Samples unanalyzed + POST: json holding the list of Sample/Readset Name or id AND location endpoint + experiment nucleic_acid_type + return: Samples/Readsets unanalyzed with location endpoint + experiment nucleic_acid_type """ if request.method == 'POST': try: diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py index 4f3ee18..261494e 100644 --- a/project_tracking/db_action.py +++ b/project_tracking/db_action.py @@ -53,13 +53,22 @@ def to_dict(self): return rv class DidNotFindError(Error): - """DidNotFind""" + """DidNotFindError""" def __init__(self, message=None, table=None, attribute=None, query=None): super().__init__(message) if message: self.message = message else: - self.message = f"{table} with {attribute} {query} doesn't exist on database" + self.message = f"'{table}' with '{attribute}' '{query}' doesn't exist on database" + +class RequestError(Error): + """RequestError""" + def __init__(self, message=None, argument=None): + super().__init__(message) + if message: + self.message = message + else: + self.message = f"For current request '{argument}' is required" def name_to_id(model_class, name, session=None): """ @@ -590,6 +599,7 @@ def ingest_run_processing(project_id: str, ingest_data, session=None): experiment = Experiment.from_attributes( sequencing_technology=readset_json[vb.EXPERIMENT_SEQUENCING_TECHNOLOGY], type=readset_json[vb.EXPERIMENT_TYPE], + nucleic_acid_type=readset_json[vb.EXPERIMENT_NUCLEIC_ACID_TYPE], library_kit=readset_json[vb.EXPERIMENT_LIBRARY_KIT], kit_expiration_date=kit_expiration_date, session=session @@ -641,10 +651,14 @@ def ingest_run_processing(project_id: str, ingest_data, session=None): metric_deliverable = metric_json[vb.METRIC_DELIVERABLE] else: metric_deliverable = False + if vb.METRIC_FLAG in metric_json: + metric_flag = FlagEnum(metric_json[vb.METRIC_FLAG]) + else: + metric_flag = None Metric( name=metric_json[vb.METRIC_NAME], value=metric_json[vb.METRIC_VALUE], - flag=FlagEnum(metric_json[vb.METRIC_FLAG]), + flag=metric_flag, deliverable=metric_deliverable, job=job, readsets=[readset] @@ -752,6 +766,7 @@ def digest_readset_file(project_id: str, digest_data, session=None): if not session: session = database.get_session() + patients = [] samples = [] readsets = [] output = [] @@ -760,49 +775,111 @@ def digest_readset_file(project_id: str, digest_data, session=None): } location_endpoint = None - if vb.LOCATION_ENDPOINT in digest_data.keys(): location_endpoint = digest_data[vb.LOCATION_ENDPOINT] + if vb.EXPERIMENT_NUCLEIC_ACID_TYPE in digest_data.keys(): + nucleic_acid_type = digest_data[vb.EXPERIMENT_NUCLEIC_ACID_TYPE] + else: + raise RequestError(argument="experiment_nucleic_acid_type") + + if vb.PATIENT_NAME in digest_data.keys(): + for patient_name in digest_data[vb.PATIENT_NAME]: + patient = session.scalars( + select(Patient) + .where(Patient.name == patient_name) + .join(Patient.samples) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() + if patient: + patients.append(patient) + else: + raise DidNotFindError(f"'Patient' with 'name' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database") + if vb.PATIENT_ID in digest_data.keys(): + for patient_id in digest_data[vb.PATIENT_ID]: + # logger.debug(f"\n\n{patient_id}\n\n") + patient = session.scalars( + select(Patient) + .where(Patient.id == patient_id) + .join(Patient.samples) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() + if patient: + patients.append(patient) + else: + raise DidNotFindError(f"'Patient' with 'id' '{patient_id}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database") + if patients: + set(patients) + for patient in patients: + for sample in patient.samples: + for readset in sample.readsets: + readsets.append(readset) + if vb.SAMPLE_NAME in digest_data.keys(): for sample_name in digest_data[vb.SAMPLE_NAME]: - sample = session.scalars(select(Sample).where(Sample.name == sample_name)).unique().first() + sample = session.scalars( + select(Sample) + .where(Sample.name == sample_name) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if sample: samples.append(sample) else: - raise DidNotFindError(table="Sample", attribute="name", query=sample_name) + raise DidNotFindError(f"'Sample' with 'name' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database") if vb.SAMPLE_ID in digest_data.keys(): for sample_id in digest_data[vb.SAMPLE_ID]: # logger.debug(f"\n\n{sample_id}\n\n") - sample = session.scalars(select(Sample).where(Sample.id == sample_id)).unique().first() + sample = session.scalars( + select(Sample) + .where(Sample.id == sample_id) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if sample: samples.append(sample) else: - raise DidNotFindError(table="Sample", attribute="id", query=sample_id) + raise DidNotFindError(f"'Sample' with 'id' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database") if samples: set(samples) for sample in samples: for readset in sample.readsets: readsets.append(readset) + if vb.READSET_NAME in digest_data.keys(): for readset_name in digest_data[vb.READSET_NAME]: - readset = session.scalars(select(Readset).where(Readset.name == readset_name)).unique().first() + readset = session.scalars( + select(Readset) + .where(Readset.name == readset_name) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if readset: readsets.append(readset) else: - raise DidNotFindError(table="Readset", attribute="name", query=readset_name) + raise DidNotFindError(f"'Readset' with 'name' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database") if vb.READSET_ID in digest_data.keys(): for readset_id in digest_data[vb.READSET_ID]: - readset = session.scalars(select(Readset).where(Readset.id == readset_id)).unique().first() + readset = session.scalars( + select(Readset) + .where(Readset.id == readset_id) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if readset: readsets.append(readset) else: - raise DidNotFindError(table="Readset", attribute="id", query=readset_id) + raise DidNotFindError(f"'Readset' with 'id' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database") if readsets: set(readsets) for readset in readsets: readset_files = [] - logger.debug(f"\n\n{readset}\n\n") bed = None fastq1 = None fastq2 = None @@ -866,38 +943,97 @@ def digest_pair_file(project_id: str, digest_data, session=None): pair_dict = {} samples = [] + patients = [] # readsets = [] output = [] + if vb.EXPERIMENT_NUCLEIC_ACID_TYPE in digest_data.keys(): + nucleic_acid_type = digest_data[vb.EXPERIMENT_NUCLEIC_ACID_TYPE] + else: + raise RequestError(argument="experiment_nucleic_acid_type") + + if vb.PATIENT_NAME in digest_data.keys(): + for patient_name in digest_data[vb.PATIENT_NAME]: + patient = session.scalars( + select(Patient) + .where(Patient.name == patient_name) + .join(Patient.samples) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() + if patient: + patients.append(patient) + else: + raise DidNotFindError(table="Patient", attribute="name", query=patient_name) + if vb.PATIENT_ID in digest_data.keys(): + for patient_id in digest_data[vb.PATIENT_ID]: + patient = session.scalars( + select(Patient) + .where(Patient.id == patient_id) + .join(Patient.samples) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() + if patient: + patients.append(patient) + else: + raise DidNotFindError(table="Patient", attribute="id", query=patient_id) + if patients: + set(patients) + for patient in patients: + for sample in patient.samples: + samples.append(sample) + if vb.SAMPLE_NAME in digest_data.keys(): for sample_name in digest_data[vb.SAMPLE_NAME]: - sample = session.scalars(select(Sample).where(Sample.name == sample_name)).unique().first() - # logger.info(f"\n\n{sample}\n\n") + sample = session.scalars( + select(Sample) + .where(Sample.name == sample_name) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if sample: samples.append(sample) else: raise DidNotFindError(table="Sample", attribute="name", query=sample_name) if vb.SAMPLE_ID in digest_data.keys(): for sample_id in digest_data[vb.SAMPLE_ID]: - sample = session.scalars(select(Sample).where(Sample.id == sample_id)).unique().first() + sample = session.scalars( + select(Sample) + .where(Sample.id == sample_id) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if sample: samples.append(sample) else: raise DidNotFindError(table="Sample", attribute="id", query=sample_id) if vb.READSET_NAME in digest_data.keys(): for readset_name in digest_data[vb.READSET_NAME]: - readset = session.scalars(select(Readset).where(Readset.name == readset_name)).unique().first() + readset = session.scalars( + select(Readset) + .where(Readset.name == readset_name) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if readset: samples.append(readset.sample) - # readsets.append(readset) else: raise DidNotFindError(table="Readset", attribute="name", query=readset_name) if vb.READSET_ID in digest_data.keys(): for readset_id in digest_data[vb.READSET_ID]: - readset = session.scalars(select(Readset).where(Readset.id == readset_id)).unique().first() + readset = session.scalars( + select(Readset) + .where(Readset.id == readset_id) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if readset: samples.append(readset.sample) - # readsets.append(readset) else: raise DidNotFindError(table="Readset", attribute="id", query=readset_id) if samples: @@ -977,55 +1113,64 @@ def ingest_genpipes(project_id: str, ingest_data, session=None): job_stop = datetime.strptime(job_json[vb.JOB_STOP], vb.DATE_LONG_FMT) except TypeError: job_stop = None - job = Job( - name=job_json[vb.JOB_NAME], - status=StatusEnum(job_json[vb.JOB_STATUS]), - start=job_start, - stop=job_stop, - operation=operation - ) - for file_json in job_json[vb.FILE]: - suffixes = Path(file_json[vb.FILE_NAME]).suffixes - file_type = os.path.splitext(file_json[vb.FILE_NAME])[-1][1:] - if ".gz" in suffixes: - file_type = "".join(suffixes[-2:]) - if vb.FILE_DELIVERABLE in file_json: - file_deliverable = file_json[vb.FILE_DELIVERABLE] - else: - file_deliverable = False - # Need to have an the following otherwise assigning extra_metadata to None converts null into json in the db - if vb.FILE_EXTRA_METADATA in file_json.keys(): - file = File( - name=file_json[vb.FILE_NAME], - type=file_type, - extra_metadata=file_json[vb.FILE_EXTRA_METADATA], - deliverable=file_deliverable, - readsets=[readset], - jobs=[job] - ) - else: - file = File( - name=file_json[vb.FILE_NAME], - type=file_type, - deliverable=file_deliverable, - readsets=[readset], - jobs=[job] - ) - location = Location.from_uri(uri=file_json[vb.LOCATION_URI], file=file, session=session) - if vb.METRIC in job_json.keys(): - for metric_json in job_json[vb.METRIC]: - if vb.METRIC_DELIVERABLE in metric_json: - metric_deliverable = metric_json[vb.METRIC_DELIVERABLE] + # Check if job_status exists otherwise skip it + if job_json[vb.JOB_STATUS]: + job = Job( + name=job_json[vb.JOB_NAME], + status=StatusEnum(job_json[vb.JOB_STATUS]), + start=job_start, + stop=job_stop, + operation=operation + ) + for file_json in job_json[vb.FILE]: + suffixes = Path(file_json[vb.FILE_NAME]).suffixes + file_type = os.path.splitext(file_json[vb.FILE_NAME])[-1][1:] + if ".gz" in suffixes: + file_type = "".join(suffixes[-2:]) + if vb.FILE_DELIVERABLE in file_json: + file_deliverable = file_json[vb.FILE_DELIVERABLE] else: - metric_deliverable = False - Metric( - name=metric_json[vb.METRIC_NAME], - value=metric_json[vb.METRIC_VALUE], - flag=FlagEnum(metric_json[vb.METRIC_FLAG]), - deliverable=metric_deliverable, - job=job, - readsets=[readset] - ) + file_deliverable = False + # Need to have an the following otherwise assigning extra_metadata to None converts null into json in the db + if vb.FILE_EXTRA_METADATA in file_json.keys(): + file = File( + name=file_json[vb.FILE_NAME], + type=file_type, + extra_metadata=file_json[vb.FILE_EXTRA_METADATA], + deliverable=file_deliverable, + readsets=[readset], + jobs=[job] + ) + else: + file = File( + name=file_json[vb.FILE_NAME], + type=file_type, + deliverable=file_deliverable, + readsets=[readset], + jobs=[job] + ) + location = Location.from_uri(uri=file_json[vb.LOCATION_URI], file=file, session=session) + if vb.METRIC in job_json.keys(): + for metric_json in job_json[vb.METRIC]: + if vb.METRIC_DELIVERABLE in metric_json: + metric_deliverable = metric_json[vb.METRIC_DELIVERABLE] + else: + metric_deliverable = False + if vb.METRIC_FLAG in metric_json: + metric_flag = FlagEnum(metric_json[vb.METRIC_FLAG]) + else: + metric_flag = None + Metric( + name=metric_json[vb.METRIC_NAME], + value=metric_json[vb.METRIC_VALUE], + flag=metric_flag, + deliverable=metric_deliverable, + job=job, + readsets=[readset] + ) + # If job status is null then skip it as we don't want to ingest data not generated + else: + pass session.add(job) session.flush() @@ -1066,7 +1211,7 @@ def digest_unanalyzed(project_id: str, digest_data, session=None): run_name = digest_data["run_name"] if run_name: run_id = name_to_id("Run", run_name)[0] - experiment_sequencing_technology = digest_data["experiment_sequencing_technology"] + experiment_nucleic_acid_type = digest_data["experiment_nucleic_acid_type"] location_endpoint = digest_data["location_endpoint"] if sample_name_flag: @@ -1096,17 +1241,16 @@ def digest_unanalyzed(project_id: str, digest_data, session=None): stmt.where(Run.id == run_id) .join(Readset.run) ) - if experiment_sequencing_technology: + if experiment_nucleic_acid_type: stmt = ( - stmt.where(Experiment.sequencing_technology == experiment_sequencing_technology) + stmt.where(Experiment.nucleic_acid_type == experiment_nucleic_acid_type) .join(Readset.experiment) ) - # logger.debug(f"\n\n{stmt}\n\n") output = { "location_endpoint": location_endpoint, + "experiment_nucleic_acid_type": experiment_nucleic_acid_type, key: session.scalars(stmt).unique().all() } - # logger.debug(f"\n\n{session.scalars(stmt).unique().all()}\n\n") return json.dumps(output) diff --git a/project_tracking/model.py b/project_tracking/model.py index 2b74951..8ad1ee6 100644 --- a/project_tracking/model.py +++ b/project_tracking/model.py @@ -35,6 +35,12 @@ from . import database +class NucleicAcidTypeEnum(enum.Enum): + """nucleic_acid_type enum""" + DNA = "DNA" + RNA = "RNA" + + class LaneEnum(enum.Enum): """ lane enum @@ -43,6 +49,10 @@ class LaneEnum(enum.Enum): TWO = "2" THREE = "3" FOUR = "4" + FIVE = "5" + SIX = "6" + SEVEN = "7" + EIGHT = "8" class SequencingTypeEnum(enum.Enum): @@ -90,6 +100,7 @@ class Base(DeclarativeBase): # this is needed for the enum to work properly right now # see https://github.com/sqlalchemy/sqlalchemy/discussions/8856 type_annotation_map = { + NucleicAcidTypeEnum: Enum(NucleicAcidTypeEnum), LaneEnum: Enum(LaneEnum), SequencingTypeEnum: Enum(SequencingTypeEnum), StatusEnum: Enum(StatusEnum), @@ -346,6 +357,7 @@ class Experiment(BaseTable): id integer [PK] sequencing_technology text type text + nucleic_acid_type nucleic_acid_type library_kit text kit_expiration_date text deprecated boolean @@ -358,13 +370,22 @@ class Experiment(BaseTable): sequencing_technology: Mapped[str] = mapped_column(default=None, nullable=True) type: Mapped[str] = mapped_column(default=None, nullable=True) + nucleic_acid_type: Mapped[NucleicAcidTypeEnum] = mapped_column(default=None, nullable=False) library_kit: Mapped[str] = mapped_column(default=None, nullable=True) kit_expiration_date: Mapped[datetime] = mapped_column(default=None, nullable=True) readsets: Mapped[list["Readset"]] = relationship(back_populates="experiment") @classmethod - def from_attributes(cls, sequencing_technology=None, type=None, library_kit=None, kit_expiration_date=None, session=None): + def from_attributes( + cls, + nucleic_acid_type, + sequencing_technology=None, + type=None, + library_kit=None, + kit_expiration_date=None, + session=None + ): """ get experiment if it exist, set it if it does not exist """ @@ -374,6 +395,7 @@ def from_attributes(cls, sequencing_technology=None, type=None, library_kit=None select(cls) .where(cls.sequencing_technology == sequencing_technology) .where(cls.type == type) + .where(cls.nucleic_acid_type == nucleic_acid_type) .where(cls.library_kit == library_kit) .where(cls.kit_expiration_date == kit_expiration_date) ).first() @@ -381,6 +403,7 @@ def from_attributes(cls, sequencing_technology=None, type=None, library_kit=None experiment = cls( sequencing_technology=sequencing_technology, type=type, + nucleic_acid_type=nucleic_acid_type, library_kit=library_kit, kit_expiration_date=kit_expiration_date ) diff --git a/project_tracking/vocabulary.py b/project_tracking/vocabulary.py index 90d8cfc..28920d5 100644 --- a/project_tracking/vocabulary.py +++ b/project_tracking/vocabulary.py @@ -10,6 +10,7 @@ # patient table PATIENT = "patient" +PATIENT_ID = "patient_id" PATIENT_FMS_ID = "patient_fms_id" PATIENT_NAME = "patient_name" PATIENT_COHORT = "patient_cohort" @@ -25,6 +26,7 @@ # experiment table EXPERIMENT_SEQUENCING_TECHNOLOGY = "experiment_sequencing_technology" EXPERIMENT_TYPE = "experiment_type" +EXPERIMENT_NUCLEIC_ACID_TYPE = "experiment_nucleic_acid_type" EXPERIMENT_LIBRARY_KIT = "experiment_library_kit" EXPERIMENT_KIT_EXPIRATION_DATE = "experiment_kit_expiration_date" EXPERIMENT_TYPE_LIST = ["PCR-FREE", "RNASEQ"] diff --git a/tests/conftest.py b/tests/conftest.py index 5a9d125..99b407a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -28,7 +28,7 @@ def pre_filled_model(): project=project) sequencing_technology = 'Fancy Buzzword' - exp = model.Experiment(sequencing_technology=sequencing_technology) + exp = model.Experiment(nucleic_acid_type=model.NucleicAcidTypeEnum.DNA) pa_name = "P_O" pa = model.Patient(name=pa_name, project=project) sa_name = 'gros_bobo' diff --git a/tests/data/run_processing.json b/tests/data/run_processing.json index 4ff31b8..5e85ebd 100644 --- a/tests/data/run_processing.json +++ b/tests/data/run_processing.json @@ -21,6 +21,7 @@ { "experiment_sequencing_technology": null, "experiment_type": "PCR-free", + "experiment_nucleic_acid_type": "DNA", "experiment_library_kit": null, "experiment_kit_expiration_date": null, "readset_name": "MoHQ-JG-9-23-15000863775-19933DT.A01433_0157_1", @@ -73,6 +74,7 @@ { "experiment_sequencing_technology": null, "experiment_type": "PCR-free", + "experiment_nucleic_acid_type": "DNA", "experiment_library_kit": null, "experiment_kit_expiration_date": null, "readset_name": "MoHQ-JG-9-23-15000936286-19866DN.A01433_0157_2", @@ -133,6 +135,7 @@ { "experiment_sequencing_technology": null, "experiment_type": "RNASeq", + "experiment_nucleic_acid_type": "RNA", "experiment_library_kit": null, "experiment_kit_expiration_date": null, "readset_name": "MoHQ-CM-1-3-6929-1RT.A01433_0157_3", @@ -191,6 +194,7 @@ { "experiment_sequencing_technology": null, "experiment_type": "PCR-free", + "experiment_nucleic_acid_type": "DNA", "experiment_library_kit": null, "experiment_kit_expiration_date": null, "readset_name": "MoHQ-CM-1-3-15000863775-19933DT.A01433_0157_1", @@ -243,6 +247,7 @@ { "experiment_sequencing_technology": null, "experiment_type": "PCR-free", + "experiment_nucleic_acid_type": "DNA", "experiment_library_kit": null, "experiment_kit_expiration_date": null, "readset_name": "MoHQ-CM-1-3-15000936286-19866DN.A01433_0157_2", diff --git a/tests/test_serialization.py b/tests/test_serialization.py index fa75b15..473c8c9 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -8,7 +8,6 @@ def test_serialization(not_app_db): op_config_version = 0.1 op_config_name = 'generic_index' op_name = 'ingest' - sequencing_technology = 'Fancy Buzzword' pa_name = "P_O" sa_name = 'gros_bobo' ru_name = "cure the Conglomerat old director's partner 01" @@ -28,7 +27,7 @@ def test_serialization(not_app_db): operation_config=op_c, project=project) - exp = model.Experiment(sequencing_technology=sequencing_technology) + exp = model.Experiment(nucleic_acid_type=model.NucleicAcidTypeEnum.DNA) pa = model.Patient(name=pa_name, project=project) sa = model.Sample(name=sa_name, patient=pa) ru = model.Run(instrument=instrument, name=ru_name)