From a1d4448ddb1888bab7c9bde6270d4b5025979a7e Mon Sep 17 00:00:00 2001
From: Paul STRETENOWICH <31796146+paulstretenowich@users.noreply.github.com>
Date: Mon, 15 Jan 2024 11:53:28 -0500
Subject: [PATCH] Dev (#23)

undefined
---
 .github/workflows/run_test.yml  |  38 ++++-
 Dockerfile => Containerfile     |   3 +-
 project_tracking/api/project.py |  83 ++++-----
 project_tracking/db_action.py   | 290 ++++++++++++++++++++++++--------
 project_tracking/model.py       |  25 ++-
 project_tracking/vocabulary.py  |   2 +
 tests/conftest.py               |   2 +-
 tests/data/run_processing.json  |   5 +
 tests/test_serialization.py     |   3 +-
 9 files changed, 325 insertions(+), 126 deletions(-)
 rename Dockerfile => Containerfile (85%)
diff --git a/.github/workflows/run_test.yml b/.github/workflows/run_test.yml
index 0222484..14ad43c 100644
--- a/.github/workflows/run_test.yml
+++ b/.github/workflows/run_test.yml
@@ -11,9 +11,17 @@ on:
       - 'main'
       - 'dev'
 
-jobs:
-  build:
+    tags:
+      - '[0-9]+.[0-9]+.[0-9]+'
+
+env:
+  REGISTRY_USER: c3genomics+github_pusher
+  IMAGE_REGISTRY: quay.io
+  REGISTRY_PASSWORD: ${{ secrets.QUAY_ROBOT_TOKEN }}
+  IMAGE: c3genomics/project_tracking
 
+jobs:
+  test:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
@@ -41,3 +49,29 @@ jobs:
       - name: Test with pytest
         run: |
           pytest -v
+  build:
+    needs: test
+    if: startsWith(github.ref, 'refs/tags')
+    name: Build image
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: set tag
+      run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
+    - name: Buildah Action
+      uses: redhat-actions/buildah-build@v2
+      with:
+        image: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE }}
+        tags: ${{ env.RELEASE_VERSION }} latest_release
+        containerfiles: ./Containerfile
+    - name: Push to repo
+      uses: redhat-actions/push-to-registry@v2
+      with:
+          username: ${{ env.REGISTRY_USER }}
+          password: ${{ env.REGISTRY_PASSWORD }}
+          registry: ${{ env.IMAGE_REGISTRY }}
+          image: ${{ env.IMAGE }}
+          tags: ${{ env.RELEASE_VERSION }} latest_release
+    - name: Print image url
+      run: echo "Image pushed to ${{ steps.push-to-repo.outputs.registry-paths }}"
+      
diff --git a/Dockerfile b/Containerfile
similarity index 85%
rename from Dockerfile
rename to Containerfile
index 5a421ca..55e7aaf 100644
--- a/Dockerfile
+++ b/Containerfile
@@ -1,4 +1,5 @@
-FROM fedora:36
+FROM fedora:39
+MAINTAINER P-O Quirion po.quirion@mcgill.ca 
 ENV APP=project_tracking
 
 RUN mkdir /app /sqlite
diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py
index c9ceb47..4edd7b1 100644
--- a/project_tracking/api/project.py
+++ b/project_tracking/api/project.py
@@ -64,7 +64,8 @@ def sanity_check(item, action_output):
 @convcheck_project
 def projects(project_id: str = None):
     """
-    project: uses the form "/project/1" for project ID and "/project/name" for project name
+    GET:
+        project: uses the form "/project/1" for project ID and "/project/name" for project name
     return: list of all the details of the poject with name "project_name" or ID "project_id"
     """
 
@@ -81,8 +82,9 @@ def projects(project_id: str = None):
 @convcheck_project
 def patients(project_id: str, patient_id: str = None):
     """
-    patient_id: uses the form "1,3-8,9"
-    return: list all patient or selected patient that are also par of <project>
+    GET:
+        patient_id: uses the form "1,3-8,9", if not provided all patients are returned
+    return: list all patients or selected patients, belonging to <project>
 
     Query:
     (pair, tumor):  Default (None, true)
@@ -93,7 +95,7 @@ def patients(project_id: str, patient_id: str = None):
             Return: a subset of patient who have Tumor=False & Tumor=True samples
         (false, true):
             return: a subset of patient who only have Tumor=True samples
-        (false, true):
+        (false, false):
             return: a subset of patient who only have Tumor=false samples
     """
 
@@ -144,8 +146,9 @@ def patients(project_id: str, patient_id: str = None):
 @convcheck_project
 def samples(project_id: str, sample_id: str = None):
     """
-    sample_id: uses the form "1,3-8,9", if not provides, all sample are returned
-    return: all or selected sample that are in sample_id and part of project
+    GET:
+        sample_id: uses the form "1,3-8,9", if not provided all samples are returned
+    return: list all patients or selected samples, belonging to <project>
     """
 
     query = request.args
@@ -174,8 +177,9 @@ def samples(project_id: str, sample_id: str = None):
 @convcheck_project
 def readsets(project_id: str, readset_id: str=None):
     """
-    readset_id: uses the form "1,3-8,9", if not provided, all readsets are returned
-    return: selected readsets that are in sample_id and part of project
+    GET:
+        readset_id: uses the form "1,3-8,9", if not provided all readsets are returned
+    return: list all patients or selected readsets, belonging to <project>
     """
 
     query = request.args
@@ -207,12 +211,12 @@ def readsets(project_id: str, readset_id: str=None):
 @convcheck_project
 def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id: str=None, file_id: str=None):
     """
-    file_id: uses the form "1,3-8,9". Select file by ids
-    patient_id: uses the form "1,3-8,9". Select file by patient ids
-    sample_id: uses the form "1,3-8,9". Select file by sample ids
-    redeaset_id: uses the form "1,3-8,9". Select file by readset ids
-
-    return: selected files
+    GET:
+        file_id: uses the form "1,3-8,9". Select file by ids
+        patient_id: uses the form "1,3-8,9". Select file by patient ids
+        sample_id: uses the form "1,3-8,9". Select file by sample ids
+        redeaset_id: uses the form "1,3-8,9". Select file by readset ids
+    return: selected files, belonging to <project>
 
     Query:
     (deliverable):  Default (None)
@@ -276,19 +280,19 @@ def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id
 @convcheck_project
 def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_id: str=None, metric_id: str=None):
     """
-    metric_id: uses the form "1,3-8,9". Select metric by ids
-    patient_id: uses the form "1,3-8,9". Select metric by patient ids
-    sample_id: uses the form "1,3-8,9". Select metric by sample ids
-    redeaset_id: uses the form "1,3-8,9". Select metric by readset ids
-
-    We also accespt POST data with comma separeted list
+    GET:
+        metric_id: uses the form "1,3-8,9". Select metric by ids
+        patient_id: uses the form "1,3-8,9". Select metric by patient ids
+        sample_id: uses the form "1,3-8,9". Select metric by sample ids
+        redeaset_id: uses the form "1,3-8,9". Select metric by readset ids
+    return: selected metrics, belonging to <project>
+
+    We also accept POST data with comma separeted list
     metric_name = <NAME> [,NAME] [...]
     readset_name = <NAME> [,NAME] [...]
     sample_name = <NAME> [,NAME] [...]
     patient_name = <NAME> [,NAME] [...]
 
-    return: selected metrics
-
     Query:
     (deliverable):  Default (None)
     The deliverable query allows to get all metrics labelled as deliverable
@@ -360,8 +364,9 @@ def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_
 @convcheck_project
 def readsets_from_samples(project_id: str, sample_id: str):
     """
-    sample_id: uses the form "1,3-8,9"
-    return: readsets for selected sample_id
+    GET:
+        sample_id: uses the form "1,3-8,9"
+    return: selected readsets belonging to <sample_id>
     """
 
     query = request.args
@@ -382,14 +387,14 @@ def readsets_from_samples(project_id: str, sample_id: str):
 
     action_output = db_action.readsets(project_id, sample_id)
 
-    return sanity_check("Metric", action_output)
+    return sanity_check("Readset", action_output)
 
 
 @bp.route('/<string:project>/digest_readset_file', methods=['POST'])
 @convcheck_project
 def digest_readset_file(project_id: str):
     """
-    POST: list of Readset/Sample Name or id
+    POST: json holding the list of Patient/Sample/Readset Name or id AND location endpoint + experiment nucleic_acid_type
     return: all information to create a "Genpipes readset file"
     """
 
@@ -410,7 +415,7 @@ def digest_readset_file(project_id: str):
 @convcheck_project
 def digest_pair_file(project_id: str):
     """
-    POST: list of Readset/Sample Name or id
+    POST: json holding the list of Patient/Sample/Readset Name or id AND location endpoint + experiment nucleic_acid_type
     return: all information to create a "Genpipes pair file"
     """
 
@@ -427,7 +432,7 @@ def digest_pair_file(project_id: str):
         return db_action.digest_pair_file(project_id=project_id, digest_data=ingest_data)
 
 
-@bp.route('/<string:project>/ingest_run_processing', methods=['GET', 'POST'])
+@bp.route('/<string:project>/ingest_run_processing', methods=['POST'])
 @convcheck_project
 def ingest_run_processing(project_id: str):
     """
@@ -435,13 +440,6 @@ def ingest_run_processing(project_id: str):
     return: The Operation object
     """
 
-    # Is this if required?
-    if request.method == 'GET':
-        return abort(
-            405,
-            "Use post method to ingest runs"
-            )
-
     if request.method == 'POST':
         try:
             ingest_data = request.get_json(force=True)
@@ -480,21 +478,14 @@ def ingest_transfer(project_id: str):
 
         return [i.flat_dict for i in db_action.ingest_transfer(project_id=project_id, ingest_data=ingest_data)]
 
-@bp.route('/<string:project>/ingest_genpipes', methods=['GET', 'POST'])
+@bp.route('/<string:project>/ingest_genpipes', methods=['POST'])
 @convcheck_project
 def ingest_genpipes(project_id: str):
     """
-    POST: json describing genpipes
+    POST: json describing genpipes analysis
     return: The Operation object and Jobs associated
     """
 
-    # Is this if required?
-    if request.method == 'GET':
-        return abort(
-            405,
-            "Use post method to ingest genpipes analysis"
-            )
-
     if request.method == 'POST':
         try:
             ingest_data = request.get_json(force=True)
@@ -520,8 +511,8 @@ def ingest_genpipes(project_id: str):
 @convcheck_project
 def digest_unanalyzed(project_id: str):
     """
-    POST: list of Readset/Sample Name or id
-    return: Readsets or Samples unanalyzed
+    POST: json holding the list of Sample/Readset Name or id AND location endpoint + experiment nucleic_acid_type
+    return: Samples/Readsets unanalyzed with location endpoint + experiment nucleic_acid_type
     """
     if request.method == 'POST':
         try:
diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py
index 4f3ee18..261494e 100644
--- a/project_tracking/db_action.py
+++ b/project_tracking/db_action.py
@@ -53,13 +53,22 @@ def to_dict(self):
         return rv
 
 class DidNotFindError(Error):
-    """DidNotFind"""
+    """DidNotFindError"""
     def __init__(self, message=None, table=None, attribute=None, query=None):
         super().__init__(message)
         if message:
             self.message = message
         else:
-            self.message = f"{table} with {attribute} {query} doesn't exist on database"
+            self.message = f"'{table}' with '{attribute}' '{query}' doesn't exist on database"
+
+class RequestError(Error):
+    """RequestError"""
+    def __init__(self, message=None, argument=None):
+        super().__init__(message)
+        if message:
+            self.message = message
+        else:
+            self.message = f"For current request '{argument}' is required"
 
 def name_to_id(model_class, name, session=None):
     """
@@ -590,6 +599,7 @@ def ingest_run_processing(project_id: str, ingest_data, session=None):
                 experiment = Experiment.from_attributes(
                     sequencing_technology=readset_json[vb.EXPERIMENT_SEQUENCING_TECHNOLOGY],
                     type=readset_json[vb.EXPERIMENT_TYPE],
+                    nucleic_acid_type=readset_json[vb.EXPERIMENT_NUCLEIC_ACID_TYPE],
                     library_kit=readset_json[vb.EXPERIMENT_LIBRARY_KIT],
                     kit_expiration_date=kit_expiration_date,
                     session=session
@@ -641,10 +651,14 @@ def ingest_run_processing(project_id: str, ingest_data, session=None):
                         metric_deliverable = metric_json[vb.METRIC_DELIVERABLE]
                     else:
                         metric_deliverable = False
+                    if vb.METRIC_FLAG in metric_json:
+                        metric_flag = FlagEnum(metric_json[vb.METRIC_FLAG])
+                    else:
+                        metric_flag = None
                     Metric(
                         name=metric_json[vb.METRIC_NAME],
                         value=metric_json[vb.METRIC_VALUE],
-                        flag=FlagEnum(metric_json[vb.METRIC_FLAG]),
+                        flag=metric_flag,
                         deliverable=metric_deliverable,
                         job=job,
                         readsets=[readset]
@@ -752,6 +766,7 @@ def digest_readset_file(project_id: str, digest_data, session=None):
     if not session:
         session = database.get_session()
 
+    patients = []
     samples = []
     readsets = []
     output = []
@@ -760,49 +775,111 @@ def digest_readset_file(project_id: str, digest_data, session=None):
         }
 
     location_endpoint = None
-
     if vb.LOCATION_ENDPOINT in digest_data.keys():
         location_endpoint = digest_data[vb.LOCATION_ENDPOINT]
 
+    if vb.EXPERIMENT_NUCLEIC_ACID_TYPE in digest_data.keys():
+        nucleic_acid_type = digest_data[vb.EXPERIMENT_NUCLEIC_ACID_TYPE]
+    else:
+        raise RequestError(argument="experiment_nucleic_acid_type")
+
+    if vb.PATIENT_NAME in digest_data.keys():
+        for patient_name in digest_data[vb.PATIENT_NAME]:
+            patient = session.scalars(
+                select(Patient)
+                .where(Patient.name == patient_name)
+                .join(Patient.samples)
+                .join(Sample.readsets)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
+            if patient:
+                patients.append(patient)
+            else:
+                raise DidNotFindError(f"'Patient' with 'name' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database")
+    if vb.PATIENT_ID in digest_data.keys():
+        for patient_id in digest_data[vb.PATIENT_ID]:
+            # logger.debug(f"\n\n{patient_id}\n\n")
+            patient = session.scalars(
+                select(Patient)
+                .where(Patient.id == patient_id)
+                .join(Patient.samples)
+                .join(Sample.readsets)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
+            if patient:
+                patients.append(patient)
+            else:
+                raise DidNotFindError(f"'Patient' with 'id' '{patient_id}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database")
+    if patients:
+        set(patients)
+        for patient in patients:
+            for sample in patient.samples:
+                for readset in sample.readsets:
+                    readsets.append(readset)
+
     if vb.SAMPLE_NAME in digest_data.keys():
         for sample_name in digest_data[vb.SAMPLE_NAME]:
-            sample = session.scalars(select(Sample).where(Sample.name == sample_name)).unique().first()
+            sample = session.scalars(
+                select(Sample)
+                .where(Sample.name == sample_name)
+                .join(Sample.readsets)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
             if sample:
                 samples.append(sample)
             else:
-                raise DidNotFindError(table="Sample", attribute="name", query=sample_name)
+                raise DidNotFindError(f"'Sample' with 'name' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database")
     if vb.SAMPLE_ID in digest_data.keys():
         for sample_id in digest_data[vb.SAMPLE_ID]:
             # logger.debug(f"\n\n{sample_id}\n\n")
-            sample = session.scalars(select(Sample).where(Sample.id == sample_id)).unique().first()
+            sample = session.scalars(
+                select(Sample)
+                .where(Sample.id == sample_id)
+                .join(Sample.readsets)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
             if sample:
                 samples.append(sample)
             else:
-                raise DidNotFindError(table="Sample", attribute="id", query=sample_id)
+                raise DidNotFindError(f"'Sample' with 'id' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database")
     if samples:
         set(samples)
         for sample in samples:
             for readset in sample.readsets:
                 readsets.append(readset)
+
     if vb.READSET_NAME in digest_data.keys():
         for readset_name in digest_data[vb.READSET_NAME]:
-            readset = session.scalars(select(Readset).where(Readset.name == readset_name)).unique().first()
+            readset = session.scalars(
+                select(Readset)
+                .where(Readset.name == readset_name)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
             if readset:
                 readsets.append(readset)
             else:
-                raise DidNotFindError(table="Readset", attribute="name", query=readset_name)
+                raise DidNotFindError(f"'Readset' with 'name' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database")
     if vb.READSET_ID in digest_data.keys():
         for readset_id in digest_data[vb.READSET_ID]:
-            readset = session.scalars(select(Readset).where(Readset.id == readset_id)).unique().first()
+            readset = session.scalars(
+                select(Readset)
+                .where(Readset.id == readset_id)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
             if readset:
                 readsets.append(readset)
             else:
-                raise DidNotFindError(table="Readset", attribute="id", query=readset_id)
+                raise DidNotFindError(f"'Readset' with 'id' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database")
     if readsets:
         set(readsets)
         for readset in readsets:
             readset_files = []
-            logger.debug(f"\n\n{readset}\n\n")
             bed = None
             fastq1 = None
             fastq2 = None
@@ -866,38 +943,97 @@ def digest_pair_file(project_id: str, digest_data, session=None):
 
     pair_dict = {}
     samples = []
+    patients = []
     # readsets = []
     output = []
 
+    if vb.EXPERIMENT_NUCLEIC_ACID_TYPE in digest_data.keys():
+        nucleic_acid_type = digest_data[vb.EXPERIMENT_NUCLEIC_ACID_TYPE]
+    else:
+        raise RequestError(argument="experiment_nucleic_acid_type")
+
+    if vb.PATIENT_NAME in digest_data.keys():
+        for patient_name in digest_data[vb.PATIENT_NAME]:
+            patient = session.scalars(
+                select(Patient)
+                .where(Patient.name == patient_name)
+                .join(Patient.samples)
+                .join(Sample.readsets)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
+            if patient:
+                patients.append(patient)
+            else:
+                raise DidNotFindError(table="Patient", attribute="name", query=patient_name)
+    if vb.PATIENT_ID in digest_data.keys():
+        for patient_id in digest_data[vb.PATIENT_ID]:
+            patient = session.scalars(
+                select(Patient)
+                .where(Patient.id == patient_id)
+                .join(Patient.samples)
+                .join(Sample.readsets)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
+            if patient:
+                patients.append(patient)
+            else:
+                raise DidNotFindError(table="Patient", attribute="id", query=patient_id)
+    if patients:
+        set(patients)
+        for patient in patients:
+            for sample in patient.samples:
+                samples.append(sample)
+
     if vb.SAMPLE_NAME in digest_data.keys():
         for sample_name in digest_data[vb.SAMPLE_NAME]:
-            sample = session.scalars(select(Sample).where(Sample.name == sample_name)).unique().first()
-            # logger.info(f"\n\n{sample}\n\n")
+            sample = session.scalars(
+                select(Sample)
+                .where(Sample.name == sample_name)
+                .join(Sample.readsets)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
             if sample:
                 samples.append(sample)
             else:
                 raise DidNotFindError(table="Sample", attribute="name", query=sample_name)
     if vb.SAMPLE_ID in digest_data.keys():
         for sample_id in digest_data[vb.SAMPLE_ID]:
-            sample = session.scalars(select(Sample).where(Sample.id == sample_id)).unique().first()
+            sample = session.scalars(
+                select(Sample)
+                .where(Sample.id == sample_id)
+                .join(Sample.readsets)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
             if sample:
                 samples.append(sample)
             else:
                 raise DidNotFindError(table="Sample", attribute="id", query=sample_id)
     if vb.READSET_NAME in digest_data.keys():
         for readset_name in digest_data[vb.READSET_NAME]:
-            readset = session.scalars(select(Readset).where(Readset.name == readset_name)).unique().first()
+            readset = session.scalars(
+                select(Readset)
+                .where(Readset.name == readset_name)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
             if readset:
                 samples.append(readset.sample)
-                # readsets.append(readset)
             else:
                 raise DidNotFindError(table="Readset", attribute="name", query=readset_name)
     if vb.READSET_ID in digest_data.keys():
         for readset_id in digest_data[vb.READSET_ID]:
-            readset = session.scalars(select(Readset).where(Readset.id == readset_id)).unique().first()
+            readset = session.scalars(
+                select(Readset)
+                .where(Readset.id == readset_id)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
             if readset:
                 samples.append(readset.sample)
-                # readsets.append(readset)
             else:
                 raise DidNotFindError(table="Readset", attribute="id", query=readset_id)
     if samples:
@@ -977,55 +1113,64 @@ def ingest_genpipes(project_id: str, ingest_data, session=None):
                     job_stop = datetime.strptime(job_json[vb.JOB_STOP], vb.DATE_LONG_FMT)
                 except TypeError:
                     job_stop = None
-                job = Job(
-                    name=job_json[vb.JOB_NAME],
-                    status=StatusEnum(job_json[vb.JOB_STATUS]),
-                    start=job_start,
-                    stop=job_stop,
-                    operation=operation
-                    )
-                for file_json in job_json[vb.FILE]:
-                    suffixes = Path(file_json[vb.FILE_NAME]).suffixes
-                    file_type = os.path.splitext(file_json[vb.FILE_NAME])[-1][1:]
-                    if ".gz" in suffixes:
-                        file_type = "".join(suffixes[-2:])
-                    if vb.FILE_DELIVERABLE in file_json:
-                        file_deliverable = file_json[vb.FILE_DELIVERABLE]
-                    else:
-                        file_deliverable = False
-                    # Need to have an the following otherwise assigning extra_metadata to None converts null into json in the db
-                    if vb.FILE_EXTRA_METADATA in file_json.keys():
-                        file = File(
-                            name=file_json[vb.FILE_NAME],
-                            type=file_type,
-                            extra_metadata=file_json[vb.FILE_EXTRA_METADATA],
-                            deliverable=file_deliverable,
-                            readsets=[readset],
-                            jobs=[job]
-                            )
-                    else:
-                        file = File(
-                            name=file_json[vb.FILE_NAME],
-                            type=file_type,
-                            deliverable=file_deliverable,
-                            readsets=[readset],
-                            jobs=[job]
-                            )
-                    location = Location.from_uri(uri=file_json[vb.LOCATION_URI], file=file, session=session)
-                if vb.METRIC in job_json.keys():
-                    for metric_json in job_json[vb.METRIC]:
-                        if vb.METRIC_DELIVERABLE in metric_json:
-                            metric_deliverable = metric_json[vb.METRIC_DELIVERABLE]
+                # Check if job_status exists otherwise skip it
+                if job_json[vb.JOB_STATUS]:
+                    job = Job(
+                        name=job_json[vb.JOB_NAME],
+                        status=StatusEnum(job_json[vb.JOB_STATUS]),
+                        start=job_start,
+                        stop=job_stop,
+                        operation=operation
+                        )
+                    for file_json in job_json[vb.FILE]:
+                        suffixes = Path(file_json[vb.FILE_NAME]).suffixes
+                        file_type = os.path.splitext(file_json[vb.FILE_NAME])[-1][1:]
+                        if ".gz" in suffixes:
+                            file_type = "".join(suffixes[-2:])
+                        if vb.FILE_DELIVERABLE in file_json:
+                            file_deliverable = file_json[vb.FILE_DELIVERABLE]
                         else:
-                            metric_deliverable = False
-                        Metric(
-                            name=metric_json[vb.METRIC_NAME],
-                            value=metric_json[vb.METRIC_VALUE],
-                            flag=FlagEnum(metric_json[vb.METRIC_FLAG]),
-                            deliverable=metric_deliverable,
-                            job=job,
-                            readsets=[readset]
-                            )
+                            file_deliverable = False
+                        # Need to have an the following otherwise assigning extra_metadata to None converts null into json in the db
+                        if vb.FILE_EXTRA_METADATA in file_json.keys():
+                            file = File(
+                                name=file_json[vb.FILE_NAME],
+                                type=file_type,
+                                extra_metadata=file_json[vb.FILE_EXTRA_METADATA],
+                                deliverable=file_deliverable,
+                                readsets=[readset],
+                                jobs=[job]
+                                )
+                        else:
+                            file = File(
+                                name=file_json[vb.FILE_NAME],
+                                type=file_type,
+                                deliverable=file_deliverable,
+                                readsets=[readset],
+                                jobs=[job]
+                                )
+                        location = Location.from_uri(uri=file_json[vb.LOCATION_URI], file=file, session=session)
+                    if vb.METRIC in job_json.keys():
+                        for metric_json in job_json[vb.METRIC]:
+                            if vb.METRIC_DELIVERABLE in metric_json:
+                                metric_deliverable = metric_json[vb.METRIC_DELIVERABLE]
+                            else:
+                                metric_deliverable = False
+                            if vb.METRIC_FLAG in metric_json:
+                                metric_flag = FlagEnum(metric_json[vb.METRIC_FLAG])
+                            else:
+                                metric_flag = None
+                            Metric(
+                                name=metric_json[vb.METRIC_NAME],
+                                value=metric_json[vb.METRIC_VALUE],
+                                flag=metric_flag,
+                                deliverable=metric_deliverable,
+                                job=job,
+                                readsets=[readset]
+                                )
+                # If job status is null then skip it as we don't want to ingest data not generated
+                else:
+                    pass
 
                 session.add(job)
                 session.flush()
@@ -1066,7 +1211,7 @@ def digest_unanalyzed(project_id: str, digest_data, session=None):
     run_name = digest_data["run_name"]
     if run_name:
         run_id = name_to_id("Run", run_name)[0]
-    experiment_sequencing_technology = digest_data["experiment_sequencing_technology"]
+    experiment_nucleic_acid_type = digest_data["experiment_nucleic_acid_type"]
     location_endpoint = digest_data["location_endpoint"]
 
     if sample_name_flag:
@@ -1096,17 +1241,16 @@ def digest_unanalyzed(project_id: str, digest_data, session=None):
             stmt.where(Run.id == run_id)
             .join(Readset.run)
             )
-    if experiment_sequencing_technology:
+    if experiment_nucleic_acid_type:
         stmt = (
-            stmt.where(Experiment.sequencing_technology == experiment_sequencing_technology)
+            stmt.where(Experiment.nucleic_acid_type == experiment_nucleic_acid_type)
             .join(Readset.experiment)
             )
 
-    # logger.debug(f"\n\n{stmt}\n\n")
     output = {
         "location_endpoint": location_endpoint,
+        "experiment_nucleic_acid_type": experiment_nucleic_acid_type,
         key: session.scalars(stmt).unique().all()
     }
-    # logger.debug(f"\n\n{session.scalars(stmt).unique().all()}\n\n")
 
     return json.dumps(output)
diff --git a/project_tracking/model.py b/project_tracking/model.py
index 2b74951..8ad1ee6 100644
--- a/project_tracking/model.py
+++ b/project_tracking/model.py
@@ -35,6 +35,12 @@
 
 from . import database
 
+class NucleicAcidTypeEnum(enum.Enum):
+    """nucleic_acid_type enum"""
+    DNA = "DNA"
+    RNA = "RNA"
+
+
 class LaneEnum(enum.Enum):
     """
     lane enum
@@ -43,6 +49,10 @@ class LaneEnum(enum.Enum):
     TWO = "2"
     THREE = "3"
     FOUR = "4"
+    FIVE = "5"
+    SIX = "6"
+    SEVEN = "7"
+    EIGHT = "8"
 
 
 class SequencingTypeEnum(enum.Enum):
@@ -90,6 +100,7 @@ class Base(DeclarativeBase):
     # this is needed for the enum to work properly right now
     # see https://github.com/sqlalchemy/sqlalchemy/discussions/8856
     type_annotation_map = {
+        NucleicAcidTypeEnum: Enum(NucleicAcidTypeEnum),
         LaneEnum: Enum(LaneEnum),
         SequencingTypeEnum: Enum(SequencingTypeEnum),
         StatusEnum: Enum(StatusEnum),
@@ -346,6 +357,7 @@ class Experiment(BaseTable):
         id integer [PK]
         sequencing_technology text
         type text
+        nucleic_acid_type nucleic_acid_type
         library_kit text
         kit_expiration_date text
         deprecated boolean
@@ -358,13 +370,22 @@ class Experiment(BaseTable):
 
     sequencing_technology: Mapped[str] = mapped_column(default=None, nullable=True)
     type: Mapped[str] = mapped_column(default=None, nullable=True)
+    nucleic_acid_type: Mapped[NucleicAcidTypeEnum] = mapped_column(default=None, nullable=False)
     library_kit: Mapped[str] = mapped_column(default=None, nullable=True)
     kit_expiration_date: Mapped[datetime] = mapped_column(default=None, nullable=True)
 
     readsets: Mapped[list["Readset"]] = relationship(back_populates="experiment")
 
     @classmethod
-    def from_attributes(cls, sequencing_technology=None, type=None, library_kit=None, kit_expiration_date=None, session=None):
+    def from_attributes(
+        cls,
+        nucleic_acid_type,
+        sequencing_technology=None,
+        type=None,
+        library_kit=None,
+        kit_expiration_date=None,
+        session=None
+        ):
         """
         get experiment if it exist, set it if it does not exist
         """
@@ -374,6 +395,7 @@ def from_attributes(cls, sequencing_technology=None, type=None, library_kit=None
             select(cls)
                 .where(cls.sequencing_technology == sequencing_technology)
                 .where(cls.type == type)
+                .where(cls.nucleic_acid_type == nucleic_acid_type)
                 .where(cls.library_kit == library_kit)
                 .where(cls.kit_expiration_date == kit_expiration_date)
         ).first()
@@ -381,6 +403,7 @@ def from_attributes(cls, sequencing_technology=None, type=None, library_kit=None
             experiment = cls(
                 sequencing_technology=sequencing_technology,
                 type=type,
+                nucleic_acid_type=nucleic_acid_type,
                 library_kit=library_kit,
                 kit_expiration_date=kit_expiration_date
             )
diff --git a/project_tracking/vocabulary.py b/project_tracking/vocabulary.py
index 90d8cfc..28920d5 100644
--- a/project_tracking/vocabulary.py
+++ b/project_tracking/vocabulary.py
@@ -10,6 +10,7 @@
 
 # patient table
 PATIENT = "patient"
+PATIENT_ID = "patient_id"
 PATIENT_FMS_ID = "patient_fms_id"
 PATIENT_NAME = "patient_name"
 PATIENT_COHORT = "patient_cohort"
@@ -25,6 +26,7 @@
 # experiment table
 EXPERIMENT_SEQUENCING_TECHNOLOGY = "experiment_sequencing_technology"
 EXPERIMENT_TYPE = "experiment_type"
+EXPERIMENT_NUCLEIC_ACID_TYPE = "experiment_nucleic_acid_type"
 EXPERIMENT_LIBRARY_KIT = "experiment_library_kit"
 EXPERIMENT_KIT_EXPIRATION_DATE = "experiment_kit_expiration_date"
 EXPERIMENT_TYPE_LIST = ["PCR-FREE", "RNASEQ"]
diff --git a/tests/conftest.py b/tests/conftest.py
index 5a9d125..99b407a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -28,7 +28,7 @@ def pre_filled_model():
                          project=project)
 
     sequencing_technology = 'Fancy Buzzword'
-    exp = model.Experiment(sequencing_technology=sequencing_technology)
+    exp = model.Experiment(nucleic_acid_type=model.NucleicAcidTypeEnum.DNA)
     pa_name = "P_O"
     pa = model.Patient(name=pa_name, project=project)
     sa_name = 'gros_bobo'
diff --git a/tests/data/run_processing.json b/tests/data/run_processing.json
index 4ff31b8..5e85ebd 100644
--- a/tests/data/run_processing.json
+++ b/tests/data/run_processing.json
@@ -21,6 +21,7 @@
                         {
                             "experiment_sequencing_technology": null,
                             "experiment_type": "PCR-free",
+                            "experiment_nucleic_acid_type": "DNA",
                             "experiment_library_kit": null,
                             "experiment_kit_expiration_date": null,
                             "readset_name": "MoHQ-JG-9-23-15000863775-19933DT.A01433_0157_1",
@@ -73,6 +74,7 @@
                         {
                             "experiment_sequencing_technology": null,
                             "experiment_type": "PCR-free",
+                            "experiment_nucleic_acid_type": "DNA",
                             "experiment_library_kit": null,
                             "experiment_kit_expiration_date": null,
                             "readset_name": "MoHQ-JG-9-23-15000936286-19866DN.A01433_0157_2",
@@ -133,6 +135,7 @@
                         {
                             "experiment_sequencing_technology": null,
                             "experiment_type": "RNASeq",
+                            "experiment_nucleic_acid_type": "RNA",
                             "experiment_library_kit": null,
                             "experiment_kit_expiration_date": null,
                             "readset_name": "MoHQ-CM-1-3-6929-1RT.A01433_0157_3",
@@ -191,6 +194,7 @@
                         {
                             "experiment_sequencing_technology": null,
                             "experiment_type": "PCR-free",
+                            "experiment_nucleic_acid_type": "DNA",
                             "experiment_library_kit": null,
                             "experiment_kit_expiration_date": null,
                             "readset_name": "MoHQ-CM-1-3-15000863775-19933DT.A01433_0157_1",
@@ -243,6 +247,7 @@
                         {
                             "experiment_sequencing_technology": null,
                             "experiment_type": "PCR-free",
+                            "experiment_nucleic_acid_type": "DNA",
                             "experiment_library_kit": null,
                             "experiment_kit_expiration_date": null,
                             "readset_name": "MoHQ-CM-1-3-15000936286-19866DN.A01433_0157_2",
diff --git a/tests/test_serialization.py b/tests/test_serialization.py
index fa75b15..473c8c9 100644
--- a/tests/test_serialization.py
+++ b/tests/test_serialization.py
@@ -8,7 +8,6 @@ def test_serialization(not_app_db):
     op_config_version = 0.1
     op_config_name = 'generic_index'
     op_name = 'ingest'
-    sequencing_technology = 'Fancy Buzzword'
     pa_name = "P_O"
     sa_name = 'gros_bobo'
     ru_name = "cure the Conglomerat old director's partner 01"
@@ -28,7 +27,7 @@ def test_serialization(not_app_db):
                          operation_config=op_c,
                          project=project)
 
-    exp = model.Experiment(sequencing_technology=sequencing_technology)
+    exp = model.Experiment(nucleic_acid_type=model.NucleicAcidTypeEnum.DNA)
     pa = model.Patient(name=pa_name, project=project)
     sa = model.Sample(name=sa_name, patient=pa)
     ru = model.Run(instrument=instrument, name=ru_name)