Skip to content

Commit

Permalink
Merge pull request #188 from wtsi-npg/devel
Browse files Browse the repository at this point in the history
Release 1.5.0 to master
  • Loading branch information
nerdstrike authored Oct 18, 2023
2 parents 4cdbb76 + 6585cad commit 174ca71
Show file tree
Hide file tree
Showing 27 changed files with 882 additions and 687 deletions.
16 changes: 15 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,21 @@
The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).

## [Unreleased]
## [1.5.0]

### Added

* Back-end code for the 'Upcoming' tab. The 'Upcoming' tab is
automatically appended to the collection of the UI tabs for
filtering wells.

### Changed

* Increased the look-back period for the inbox query from 4 weeks to
12 weeks. Introduced a preliminary filtering by the QC state, which is
now available in ml warehouse. Since the ml warehouse QC state might not
be up-to-date, a final check against the LangQC database is retained.
* Major upgrade of FastAPI, Pydantic and related dependencies.

## [1.4.1] - 2023-08-23

Expand Down
2 changes: 1 addition & 1 deletion frontend/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "npg-longue-vue",
"version": "1.4.1",
"version": "1.5.0",
"description": "UI for LangQC",
"author": "Kieron Taylor <kt19@sanger.ac.uk>",
"license": "GPL-3.0-or-later",
Expand Down
2 changes: 1 addition & 1 deletion lang_qc/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.4.1"
__version__ = "1.5.0"
2 changes: 1 addition & 1 deletion lang_qc/db/helper/well.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def _create_well(
).scalar_one()
product_json = PacBioEntity(
run_name=run_name, well_label=well_label, plate_number=plate_number
).json()
).model_dump_json()

# TODO: in future for composite products we have to check whether any of
# the `sub_product` table entries we are linking to already exist.
Expand Down
101 changes: 80 additions & 21 deletions lang_qc/db/helper/wells.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@
from datetime import date, datetime, timedelta
from typing import ClassVar, List

from pydantic import BaseModel, Extra, Field
from pydantic import BaseModel, ConfigDict, Field
from sqlalchemy import and_, or_, select
from sqlalchemy.orm import Session

from lang_qc.db.helper.qc import get_qc_states_by_id_product_list
from lang_qc.db.helper.qc import (
get_qc_states_by_id_product_list,
qc_state_for_product_exists,
)
from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
from lang_qc.db.qc_schema import QcState, QcStateDict, QcType
from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWell
Expand All @@ -42,6 +45,8 @@
Here this type is used to mark a purely internal to the class variables.
"""

INBOX_LOOK_BACK_NUM_WEEKS = 12


class WellWh(BaseModel):
"""
Expand All @@ -54,12 +59,9 @@ class WellWh(BaseModel):
title="SQLAlchemy Session",
description="A SQLAlchemy Session for the ml warehouse database",
)
INBOX_LOOK_BACK_NUM_WEEKS: ClassVar = 4

class Config:
allow_mutation = False
arbitrary_types_allowed = True
allow_population_by_field_name = True
model_config = ConfigDict(arbitrary_types_allowed=True, populate_by_name=True)
# frozen=True from Pydantic v2 does not work the way we want it to during testing.
# The TestClient seems to be keeping these instances alive and changing them.

def get_mlwh_well_by_product_id(
self, id_product: str
Expand All @@ -77,31 +79,35 @@ def get_mlwh_well_by_product_id(

def recent_completed_wells(self) -> List[PacBioRunWellMetrics]:
"""
Get recent completed wells from the mlwh database.
The implementation of the inbox query might change when the QC outcomes
become available in mlwh.
Get recent not QC-ed completed wells from the mlwh database.
Recent wells are defined as wells that completed within the
last 12 weeks.
"""

######
# It is important not to show aborted wells in the inbox.
#
# The well can be complete as in Illumina 'run complete' but that's not
# the same as analysis complete which the other conditions are trying for.
# It potentially gets a bit easier with v11 but those conditions should
# still work ok.
# The well can be complete, but that's not the same as analysis
# complete which the other conditions are trying for.
# It potentially gets a bit easier with v11 but those conditions
# should still work ok.
#

# Using current local time.
# Generating a date rather than a timestamp here in order to have a consistent
# earliest date for the look-back period during the QC team's working day.
my_date = date.today() - timedelta(weeks=self.INBOX_LOOK_BACK_NUM_WEEKS)
my_date = date.today() - timedelta(weeks=INBOX_LOOK_BACK_NUM_WEEKS)
look_back_min_date = datetime(my_date.year, my_date.month, my_date.day)

# Select the wells that has not been QC-ed, but later double-check against
# the LangQC database.

# TODO: fall back to run_complete when well_complete is undefined

query = (
select(PacBioRunWellMetrics)
.where(PacBioRunWellMetrics.well_status == "Complete")
.where(PacBioRunWellMetrics.qc_seq_state.is_(None))
.where(PacBioRunWellMetrics.run_complete > look_back_min_date)
.where(PacBioRunWellMetrics.polymerase_num_reads.is_not(None))
.where(
Expand Down Expand Up @@ -179,10 +185,7 @@ class PacBioPagedWellsFactory(WellWh, PagedResponse):
),
}

class Config:
arbitrary_types_allowed = True
extra = Extra.forbid
allow_mutation = True
model_config = ConfigDict(arbitrary_types_allowed=True, extra="forbid")

def create_for_qc_status(
self, qc_flow_status: QcFlowStatusEnum
Expand Down Expand Up @@ -211,6 +214,8 @@ def create_for_qc_status(
QcFlowStatusEnum.UNKNOWN,
]:
wells = self._aborted_and_unknown_wells(qc_flow_status)
elif qc_flow_status == QcFlowStatusEnum.UPCOMING:
wells = self._upcoming_wells()
else:
wells = self._get_wells_for_status(qc_flow_status)

Expand Down Expand Up @@ -320,6 +325,60 @@ def _add_tracking_info(self, wells: List[PacBioWell]):
else:
well.copy_run_tracking_info(db_well)

def _upcoming_wells(self):
"""
Upcoming wells are recent wells, which do not belong to any other
QC flow statuses as defined in QcFlowStatus. Recent wells are defined
as wells that belong to runs that started within the last 12 weeks.
"""

recent_completed_product_ids = [
w.id_pac_bio_product for w in self.recent_completed_wells()
]

my_date = date.today() - timedelta(weeks=INBOX_LOOK_BACK_NUM_WEEKS)
look_back_min_date = datetime(my_date.year, my_date.month, my_date.day)

# If queries for any other filters change, this query should be revised
# since we are repeating (but negating) a few condition that are
# associated with some of the statuses (filters).

query = (
select(PacBioRunWellMetrics)
.where(PacBioRunWellMetrics.run_start > look_back_min_date)
.where(PacBioRunWellMetrics.qc_seq_state.is_(None))
.where(
PacBioRunWellMetrics.id_pac_bio_product.not_in(
recent_completed_product_ids
)
)
.where(PacBioRunWellMetrics.well_status.not_like("Abort%"))
.where(PacBioRunWellMetrics.well_status.not_like("Terminat%"))
.where(PacBioRunWellMetrics.well_status.not_like("Fail%"))
.where(PacBioRunWellMetrics.well_status.not_like("Error%"))
.where(PacBioRunWellMetrics.well_status.not_in(["Unknown", "On hold"]))
.order_by(
PacBioRunWellMetrics.run_start,
PacBioRunWellMetrics.pac_bio_run_name,
PacBioRunWellMetrics.plate_number,
PacBioRunWellMetrics.well_label,
)
)

wells = []
for w in self.session.execute(query).scalars().all():
if (
qc_state_for_product_exists(
session=self.qcdb_session, id_product=w.id_pac_bio_product
)
is False
):
wells.append(w)

self.total_number_of_items = len(wells) # Save the number of retrieved wells.

return self._well_models(self.slice_data(wells), False)

def _recent_inbox_wells(self, recent_wells):

inbox_wells_indexes = []
Expand Down Expand Up @@ -397,7 +456,7 @@ def _well_models(
# A well can have only one or zero current sequencing outcomes.
if qced_products is not None:
attrs["qc_state"] = qced_products[0]
pb_well = PacBioWell.parse_obj(attrs)
pb_well = PacBioWell.model_validate(attrs)
pb_well.copy_run_tracking_info(db_well)
pb_wells.append(pb_well)

Expand Down
19 changes: 12 additions & 7 deletions lang_qc/endpoints/pacbio_well.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Authors:
# Adam Blanchet
# Marina Gourtovaia <mg8@sanger.ac.uk>
# Kieron Taylor <kt19@sanger.ac.uk>
#
# This file is part of npg_langqc.
#
Expand All @@ -19,8 +20,9 @@
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>.

from fastapi import APIRouter, Depends, HTTPException
from pydantic import PositiveInt
from typing import Annotated

from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.orm import Session
from starlette import status

Expand Down Expand Up @@ -75,7 +77,6 @@
in models in lang_qc.models.qc_state).
"""


router = APIRouter(
prefix="/pacbio",
tags=["pacbio"],
Expand All @@ -84,6 +85,10 @@
},
)

OptionalPositiveInt = Annotated[int | None, Query(gt=0)]
# We cannot get this from pydantic as of v2, so we use Python 3.9 annotated type support
# and FastAPI query constraints on URL query chunks.


@router.get(
"/wells",
Expand All @@ -104,8 +109,8 @@
response_model=PacBioPagedWells,
)
def get_wells_filtered_by_status(
page_size: PositiveInt,
page_number: PositiveInt,
page_size: OptionalPositiveInt,
page_number: OptionalPositiveInt,
qc_status: QcFlowStatusEnum = QcFlowStatusEnum.INBOX,
qcdb_session: Session = Depends(get_qc_db),
mlwh_session: Session = Depends(get_mlwh_db),
Expand Down Expand Up @@ -138,8 +143,8 @@ def get_wells_filtered_by_status(
)
def get_wells_in_run(
run_name: str,
page_size: PositiveInt = 20,
page_number: PositiveInt = 1,
page_size: OptionalPositiveInt = 20,
page_number: OptionalPositiveInt = 1,
qcdb_session: Session = Depends(get_qc_db),
mlwh_session: Session = Depends(get_mlwh_db),
):
Expand Down
2 changes: 1 addition & 1 deletion lang_qc/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.gzip import GZipMiddleware
from pydantic import BaseSettings
from pydantic_settings import BaseSettings

from lang_qc.endpoints import config, pacbio_well, product

Expand Down
10 changes: 4 additions & 6 deletions lang_qc/models/pacbio/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#
# Authors:
# Marina Gourtovaia <mg8@sanger.ac.uk>
# Kieron Taylor <kt19@sanger.ac.uk>
#
# This file is part of npg_langqc.
#
Expand All @@ -20,7 +21,7 @@

from typing import List

from pydantic import BaseModel, Extra, Field
from pydantic import BaseModel, ConfigDict, Field

from lang_qc.db.mlwh_schema import PacBioRun

Expand Down Expand Up @@ -100,10 +101,7 @@ class PacBioExperiment(BaseModel):
The pac_bio_library_tube_barcode from TRACTION, AKA pool name
""",
)

class Config:
orm_mode = True
extra = Extra.forbid
model_config = ConfigDict(from_attributes=True, extra="forbid")

@classmethod
def from_orm(cls, lims_db_rows: List[PacBioRun]):
Expand Down Expand Up @@ -151,4 +149,4 @@ def from_orm(cls, lims_db_rows: List[PacBioRun]):
for key in ("library_type", "study_id"):
lims_data[key] = sorted(lims_data[key])

return cls.parse_obj(lims_data)
return cls.model_validate(lims_data)
15 changes: 8 additions & 7 deletions lang_qc/models/pacbio/qc_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Copyright (c) 2022, 2023 Genome Research Ltd.
#
# Author: Marina Gourtovaia <mg8@sanger.ac.uk>
# Authors:
# Marina Gourtovaia <mg8@sanger.ac.uk>
# Kieron Taylor <kt19@sanger.ac.uk>
#
#
# This file is part of npg_langqc.
#
Expand All @@ -17,7 +20,7 @@
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>

from pydantic import BaseModel, Field
from pydantic import BaseModel, ConfigDict, Field

from lang_qc.db.mlwh_schema import PacBioRunWellMetrics

Expand Down Expand Up @@ -122,16 +125,14 @@ class QCDataWell(BaseModel):
percentage_deplexed_bases: dict = Field(
default=None, title="Percentage of bases deplexed"
)

class Config:
orm_mode = True
model_config = ConfigDict(from_attributes=True)

@classmethod
def from_orm(cls, obj: PacBioRunWellMetrics):

# Introspect the class definition, get a dictionary of specs
# for properties with property names as the keys.
attrs = cls.schema()["properties"]
attrs = cls.model_json_schema()["properties"]
qc_data = {}

for name in attrs:
Expand All @@ -151,4 +152,4 @@ def from_orm(cls, obj: PacBioRunWellMetrics):
else:
qc_data[name]["value"] = getattr(obj, name, None)

return cls.parse_obj(qc_data)
return cls.model_validate(qc_data)
Loading

0 comments on commit 174ca71

Please sign in to comment.