From 0b04e3909856221ca2bd17acbd0461a4cf942002 Mon Sep 17 00:00:00 2001 From: mgcam Date: Mon, 31 Jul 2023 16:13:15 +0100 Subject: [PATCH 01/29] Added test data for multiple wells. Fixed tests for the code that does is ready for multiple wells. --- .../endpoints/test_single_well_qc_details.py | 149 ------------------ tests/fixtures/well_data.py | 66 +++++++- tests/test_wh_data_retrieval_pb.py | 19 ++- 3 files changed, 77 insertions(+), 157 deletions(-) delete mode 100644 tests/endpoints/test_single_well_qc_details.py diff --git a/tests/endpoints/test_single_well_qc_details.py b/tests/endpoints/test_single_well_qc_details.py deleted file mode 100644 index ee5ae180..00000000 --- a/tests/endpoints/test_single_well_qc_details.py +++ /dev/null @@ -1,149 +0,0 @@ -from fastapi.testclient import TestClient - -from tests.conftest import insert_from_yaml -from tests.fixtures.well_data import load_data4well_retrieval, load_dicts_and_users - - -def test_get_well_info( - test_client: TestClient, mlwhdb_test_session, load_data4well_retrieval -): - - insert_from_yaml( - mlwhdb_test_session, "tests/data/mlwh_pb_run_92", "lang_qc.db.mlwh_schema" - ) - - response = test_client.get("/pacbio/run/MARATHON/well/A0") - assert response.status_code == 404 - assert response.json()["detail"] == "PacBio well A0 run MARATHON not found." - - response = test_client.get("/pacbio/run/TRACTION-RUN-92/well/A1") - assert response.status_code == 200 - result = response.json() - - id_product = "cf18bd66e0f0895ea728c1d08103c62d3de8a57a5f879cee45f7b0acc028aa61" - - assert result["label"] == "A1" - assert result["run_name"] == "TRACTION-RUN-92" - assert result["run_start_time"] == "2022-04-14T12:52:34" - assert result["run_complete_time"] == "2022-04-20T09:16:53" - assert result["well_start_time"] == "2022-04-14T13:02:48" - assert result["well_complete_time"] == "2022-04-16T12:36:21" - assert result["qc_state"] is None - assert result["instrument_name"] == "64222E" - assert result["instrument_type"] == "Sequel2e" - assert result["plate_number"] is None - assert result["id_product"] == id_product - - expected_metrics = { - "smrt_link": { - "run_uuid": "7f5d45ed-aa93-46a6-92b2-4b11d4bf29da", - "dataset_uuid": "7f5d45ed-aa93-46a6-92b2-4b11d4bf29ro", - "hostname": "pacbio01.dnapipelines.sanger.ac.uk", - }, - "binding_kit": {"value": "Sequel II Binding Kit 2.2", "label": "Binding Kit"}, - "control_num_reads": {"value": 24837, "label": "Number of Control Reads"}, - "control_read_length_mean": { - "value": 50169, - "label": "Control Read Length (bp)", - }, - "hifi_read_bases": {"value": 27.08, "label": "CCS Yield (Gb)"}, - "hifi_read_length_mean": {"value": 9411, "label": "CCS Mean Length (bp)"}, - "local_base_rate": {"value": 2.77, "label": "Local Base Rate"}, - "loading_conc": {"value": 80, "label": "Loading concentration (pM)"}, - "p0_num": {"value": 34.94, "label": "P0 %"}, - "p1_num": {"value": 62.81, "label": "P1 %"}, - "p2_num": {"value": 2.25, "label": "P2 %"}, - "polymerase_read_bases": {"value": 645.57, "label": "Total Cell Yield (Gb)"}, - "polymerase_read_length_mean": { - "value": 128878, - "label": "Mean Polymerase Read Length (bp)", - }, - "movie_minutes": {"value": 30, "label": "Run Time (hr)"}, - "percentage_deplexed_reads": { - "value": None, - "label": "Percentage of reads deplexed", - }, - "percentage_deplexed_bases": { - "value": None, - "label": "Percentage of bases deplexed", - }, - } - assert result["metrics"] == expected_metrics - - etrack = result["experiment_tracking"] - assert etrack is not None - assert etrack["num_samples"] == 1 - assert etrack["study_id"] == ["6457"] - assert etrack["study_name"] == "Tree of Life - ASG" - assert etrack["sample_id"] == "7880641" - assert etrack["sample_name"] == "TOL_ASG12404704" - assert etrack["library_type"] == ["PacBio_Ultra_Low_Input"] - assert etrack["tag_sequence"] == [] - - response = test_client.get("/pacbio/run/TRACTION-RUN-92/well/D1") - assert response.status_code == 200 - result = response.json() - - id_product = "c5babd5516f7b9faab8415927e5f300d5152bb96b8b922e768d876469a14fa5d" - - assert result["label"] == "D1" - assert result["run_name"] == "TRACTION-RUN-92" - assert result["instrument_name"] == "64222E" - assert result["instrument_type"] == "Sequel2e" - assert result["plate_number"] is None - assert result["id_product"] == id_product - assert result["metrics"]["smrt_link"]["dataset_uuid"] is None - assert result["qc_state"] is None - - response = test_client.get("/pacbio/run/TRACTION_RUN_1/well/B1") - assert response.status_code == 200 - result = response.json() - - id_product = "b5a7d41453097fe3cc59644a679186e64a2147833ecc76a2870c5fe8068835ae" - - assert result["label"] == "B1" - assert result["run_name"] == "TRACTION_RUN_1" - assert result["instrument_name"] == "64016" - assert result["instrument_type"] == "Sequel2" - assert result["plate_number"] is None - assert result["id_product"] == id_product - assert result["experiment_tracking"] is None - - expected_qc_state = { - "qc_state": "On hold", - "is_preliminary": True, - "qc_type": "sequencing", - "outcome": None, - "id_product": id_product, - "date_created": "2022-12-08T07:15:19", - "date_updated": "2022-12-08T07:15:19", - "user": "zx80@example.com", - "created_by": "LangQC", - } - assert result["qc_state"] == expected_qc_state - - response = test_client.get("/pacbio/run/TRACTION_RUN_2/well/A1") - assert response.status_code == 200 - result = response.json() - - id_product = "bc00984029864176324b91e0871a32a3692a54bd9b18f00b8596a2f2a166eca3" - - assert result["label"] == "A1" - assert result["run_name"] == "TRACTION_RUN_2" - assert result["instrument_name"] == "12345" - assert result["instrument_type"] == "Revio" - assert result["plate_number"] == 1 - assert result["id_product"] == id_product - - expected_qc_state = { - "qc_state": "Failed, Instrument", - "is_preliminary": True, - "qc_type": "sequencing", - "outcome": 0, - "id_product": id_product, - "date_created": "2022-12-07T15:13:56", - "date_updated": "2022-12-07T15:13:56", - "user": "zx80@example.com", - "created_by": "LangQC", - } - assert result["qc_state"] == expected_qc_state diff --git a/tests/fixtures/well_data.py b/tests/fixtures/well_data.py index d52b0418..9e3d2954 100644 --- a/tests/fixtures/well_data.py +++ b/tests/fixtures/well_data.py @@ -203,6 +203,70 @@ "12345", "Revio", 1, + ], + [ + "TRACTION_RUN_2", + "A1", + "2022-12-02 15:11:22", + "2022-12-09 11:26:27", + "2022-12-02 15:21:20", + "2022-12-04 06:43:31", + "Complete", + "Complete", + "OffInstrument", + 3672109, + 1081531, + "12345", + "Revio", + 2, + ], + [ + "TRACTION_RUN_2", + "B1", + "2022-12-02 15:11:22", + "2022-12-09 11:26:27", + "2022-12-03 18:41:10", + "2022-12-05 03:14:01", + "Complete", + "Complete", + "OffInstrument", + 6312394, + 2109216, + "12345", + "Revio", + 2, + ], + [ + "TRACTION_RUN_2", + "C1", + "2022-12-02 15:11:22", + "2022-12-09 11:26:27", + "2022-12-04 21:52:27", + "2022-12-06 11:08:40", + "Complete", + "Complete", + "OffInstrument", + 6342397, + 2106718, + "12345", + "Revio", + 2, + ], + [ + "TRACTION_RUN_2", + "D1", + "2022-12-02 15:11:22", + "2022-12-09 11:26:27", + "2022-12-06 01:20:31", + "2022-12-07 14:13:56", + "Complete", + "Complete", + "OffInstrument", + 6342590, + 2160710, + "12345", + "Revio", + 2, ], [ "TRACTION_RUN_3", @@ -730,7 +794,7 @@ def load_data4well_retrieval( "polymerase_num_reads": record[9], "hifi_num_reads": record[10], "id_pac_bio_product": PacBioEntity( - run_name=record[0], well_label=record[1] + run_name=record[0], well_label=record[1], plate_number=record[13] ).hash_product_id(), "instrument_name": record[11], "instrument_type": record[12], diff --git a/tests/test_wh_data_retrieval_pb.py b/tests/test_wh_data_retrieval_pb.py index 6323098a..1f7c282c 100644 --- a/tests/test_wh_data_retrieval_pb.py +++ b/tests/test_wh_data_retrieval_pb.py @@ -101,18 +101,23 @@ def test_wells_in_runs_retrieval(mlwhdb_test_session, load_data4well_retrieval): assert run_name_set == {"TRACTION_RUN_1"} wells = wm.get_wells_in_runs(["TRACTION_RUN_2", "TRACTION_RUN_1"]) - assert len(wells) == 8 + assert len(wells) == 12 # Test that the rows are correctly sorted run_names = [row.pac_bio_run_name for row in wells] for i in range(0, 4): assert run_names[i] == "TRACTION_RUN_1" - for i in range(4, 8): + for i in range(4, 12): assert run_names[i] == "TRACTION_RUN_2" - well_labels = [row.well_label for row in wells] - expected_labels = ["A1", "B1", "C1", "D1"] - expected_labels.extend(expected_labels) - for i in range(0, 8): - assert well_labels[i] == expected_labels[i] + plates_and_labels = [(row.well_label, row.plate_number) for row in wells] + expected_labels = 3* ["A1", "B1", "C1", "D1"] + expected_plate_numbers = 4 * [None] + 4 * [1] + 4 * [2] + for i in range(0, 12): + assert plates_and_labels[i][0] == expected_labels[i] + if i < 4 : + assert plates_and_labels[i][1] is None + else: + assert plates_and_labels[i][1] == expected_plate_numbers[i] + From 5cc2795199326685d64107584f08d24b7919eea3 Mon Sep 17 00:00:00 2001 From: mgcam Date: Tue, 1 Aug 2023 09:41:18 +0100 Subject: [PATCH 02/29] Replaced the API endpoint for well QC data. The new endpoint /pacbio/products/seq_level?id_product=xxx replaces /pacbio/run/{run_name}/well/{well_name} in order to enable correct retrieval of wells that are identified by run name, well label and plate number. --- lang_qc/endpoints/pacbio_well.py | 30 ++-- lang_qc/endpoints/product.py | 19 +- lang_qc/util/validation.py | 56 ++++++ tests/endpoints/test_dump_qc_states.py | 2 +- .../endpoints/test_single_well_qc_details.py | 167 ++++++++++++++++++ tests/fixtures/well_data.py | 2 +- tests/test_wh_data_retrieval_pb.py | 5 +- 7 files changed, 253 insertions(+), 28 deletions(-) create mode 100644 lang_qc/util/validation.py create mode 100644 tests/endpoints/test_single_well_qc_details.py diff --git a/lang_qc/endpoints/pacbio_well.py b/lang_qc/endpoints/pacbio_well.py index 987fd3be..6b5d8745 100644 --- a/lang_qc/endpoints/pacbio_well.py +++ b/lang_qc/endpoints/pacbio_well.py @@ -20,7 +20,7 @@ # this program. If not, see . from fastapi import APIRouter, Depends, HTTPException -from pydantic import PositiveInt +from pydantic import PositiveInt, ValidationError from sqlalchemy.orm import Session from starlette import status @@ -33,6 +33,7 @@ from lang_qc.models.qc_flow_status import QcFlowStatusEnum from lang_qc.models.qc_state import QcState, QcStateBasic from lang_qc.util.auth import check_user +from lang_qc.util.validation import check_product_id_is_valid router = APIRouter( prefix="/pacbio", @@ -116,29 +117,34 @@ def get_wells_in_run( @router.get( - "/run/{run_name}/well/{well_label}", - summary="Get QC data for a well", + "/products/seq_level", + summary="Get full sequencing QC metrics and state for a product", responses={ - status.HTTP_404_NOT_FOUND: {"description": "Well does not exist"}, + status.HTTP_404_NOT_FOUND: {"description": "Well product does not exist"}, + status.HTTP_422_UNPROCESSABLE_ENTITY: {"description": "Invalid product ID"}, }, response_model=PacBioWellFull, ) -def get_pacbio_well( - run_name: str, - well_label: str, +def get_seq_metrics( + id_product: str, mlwhdb_session: Session = Depends(get_mlwh_db), qcdb_session: Session = Depends(get_qc_db), ) -> PacBioWellFull: - well_row = WellWh(session=mlwhdb_session).get_well( - run_name=run_name, well_label=well_label + try: + check_product_id_is_valid(id_product=id_product) + except ValidationError as err: + raise HTTPException(422, detail=" ".join([e["msg"] for e in err.errors()])) + + mlwh_well = WellWh(session=mlwhdb_session).get_mlwh_well_by_product_id( + id_product=id_product ) - if well_row is None: + if mlwh_well is None: raise HTTPException( - 404, detail=f"PacBio well {well_label} run {run_name} not found." + 404, detail=f"PacBio well for product ID {id_product} not found." ) - return PacBioWellFull.from_orm(well_row, qcdb_session) + return PacBioWellFull.from_orm(mlwh_well, qcdb_session) @router.post( diff --git a/lang_qc/endpoints/product.py b/lang_qc/endpoints/product.py index 606d5541..770e3154 100644 --- a/lang_qc/endpoints/product.py +++ b/lang_qc/endpoints/product.py @@ -18,15 +18,15 @@ # You should have received a copy of the GNU General Public License along with # this program. If not, see . -import re - from fastapi import APIRouter, Depends, HTTPException +from pydantic import ValidationError from sqlalchemy.orm import Session from starlette import status from lang_qc.db.helper.qc import BulkQcFetch from lang_qc.db.qc_connection import get_qc_db from lang_qc.models.qc_state import QcState +from lang_qc.util.validation import check_product_id_list_is_valid router = APIRouter( prefix="/products", @@ -36,8 +36,6 @@ }, ) -CHECKSUM_RE = re.compile("^[a-fA-F0-9]{64}$") - @router.post( "/qc", @@ -56,17 +54,16 @@ https://github.com/wtsi-npg/npg_ml_warehouse/blob/49.0.0/lib/npg_warehouse/loader/pacbio/qc_state.pm """, responses={ - status.HTTP_422_UNPROCESSABLE_ENTITY: {"description": "Invalid checksum"} + status.HTTP_422_UNPROCESSABLE_ENTITY: {"description": "Invalid product ID"} }, response_model=dict[str, list[QcState]], ) def bulk_qc_fetch(request_body: list[str], qcdb_session: Session = Depends(get_qc_db)): # Validate body as checksums, because pydantic validators seem to be buggy # for root types and lose the valid checksums - for sha in request_body: - if not CHECKSUM_RE.fullmatch(sha): - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Checksum must be hexadecimal of length 64", - ) + try: + check_product_id_list_is_valid(id_products=request_body) + except ValidationError as err: + raise HTTPException(422, detail=" ".join([e["msg"] for e in err.errors()])) + return BulkQcFetch(session=qcdb_session).query_by_id_list(request_body) diff --git a/lang_qc/util/validation.py b/lang_qc/util/validation.py new file mode 100644 index 00000000..eb099685 --- /dev/null +++ b/lang_qc/util/validation.py @@ -0,0 +1,56 @@ +# Copyright (c) 2023 Genome Research Ltd. +# +# Author: Marina Gourtovaia +# +# This file is part of npg_langqc. +# +# npg_langqc is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation; either version 3 of the License, or (at your option) any later +# version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see . + +from typing import Annotated, List + +from pydantic import Field, validate_arguments + +""" +A collection of functions for validating inputs for cases +when a full pydantic class definition feels unnecessary. +""" + +CHECKSUM_RE = "^[a-fA-F0-9]{64}$" + + +@validate_arguments +def check_product_id_is_valid(id_product: Annotated[str, Field(regex=CHECKSUM_RE)]): + """ + Validation for a product ID string. + Product ID is a SHA256 checksum, therefore, the product ID + must be hexadecimal of length 64. + + Returns the argument id_product. + """ + return id_product + + +@validate_arguments +def check_product_id_list_is_valid( + id_products: Annotated[List[str], Field(min_items=1)] +): + """ + Validation for a list of product IDs. + The argument list should have at least one product ID. + An error is raised on the first string that fails validation + for a product ID. + + Returns the argument list. + """ + return [check_product_id_is_valid(id) for id in id_products] diff --git a/tests/endpoints/test_dump_qc_states.py b/tests/endpoints/test_dump_qc_states.py index 08b0630d..2ce874cb 100644 --- a/tests/endpoints/test_dump_qc_states.py +++ b/tests/endpoints/test_dump_qc_states.py @@ -26,7 +26,7 @@ def test_get_qc_by_product_id(test_client: TestClient, load_data4well_retrieval) assert response.ok is False assert response.status_code == 422 message = response.json()["detail"] - assert message == "Checksum must be hexadecimal of length 64" + assert message.startswith("string does not match regex") response = test_client.post("/products/qc", json=[BAD_STRING]) assert response.ok is False diff --git a/tests/endpoints/test_single_well_qc_details.py b/tests/endpoints/test_single_well_qc_details.py new file mode 100644 index 00000000..e2b86652 --- /dev/null +++ b/tests/endpoints/test_single_well_qc_details.py @@ -0,0 +1,167 @@ +from fastapi.testclient import TestClient + +from tests.conftest import insert_from_yaml +from tests.fixtures.well_data import load_data4well_retrieval, load_dicts_and_users + + +def test_get_well_info( + test_client: TestClient, mlwhdb_test_session, load_data4well_retrieval +): + + insert_from_yaml( + mlwhdb_test_session, "tests/data/mlwh_pb_run_92", "lang_qc.db.mlwh_schema" + ) + + id_product = "cf18bd66e0f0895ea728c1d08103c62d3de8a57a5f879cee45f7b0acc028aa67" + response = test_client.get(f"/pacbio/products/seq_level?id_product={id_product}") + assert response.status_code == 404 + assert ( + response.json()["detail"] + == f"PacBio well for product ID {id_product} not found." + ) + + response = test_client.get(f"/pacbio/products/seq_level?id_product=342") + assert response.status_code == 422 + assert response.json()["detail"].startswith("string does not match regex") + + id_product = "cf18bd66e0f0895ea728c1d08103c62d3de8a57a5f879cee45f7b0acc028aa61" + response = test_client.get(f"/pacbio/products/seq_level?id_product={id_product}") + assert response.status_code == 200 + result = response.json() + + assert result["label"] == "A1" + assert result["run_name"] == "TRACTION-RUN-92" + assert result["run_start_time"] == "2022-04-14T12:52:34" + assert result["run_complete_time"] == "2022-04-20T09:16:53" + assert result["well_start_time"] == "2022-04-14T13:02:48" + assert result["well_complete_time"] == "2022-04-16T12:36:21" + assert result["qc_state"] is None + assert result["instrument_name"] == "64222E" + assert result["instrument_type"] == "Sequel2e" + assert result["plate_number"] is None + assert result["id_product"] == id_product + + expected_metrics = { + "smrt_link": { + "run_uuid": "7f5d45ed-aa93-46a6-92b2-4b11d4bf29da", + "dataset_uuid": "7f5d45ed-aa93-46a6-92b2-4b11d4bf29ro", + "hostname": "pacbio01.dnapipelines.sanger.ac.uk", + }, + "binding_kit": {"value": "Sequel II Binding Kit 2.2", "label": "Binding Kit"}, + "control_num_reads": {"value": 24837, "label": "Number of Control Reads"}, + "control_read_length_mean": { + "value": 50169, + "label": "Control Read Length (bp)", + }, + "hifi_read_bases": {"value": 27.08, "label": "CCS Yield (Gb)"}, + "hifi_read_length_mean": {"value": 9411, "label": "CCS Mean Length (bp)"}, + "local_base_rate": {"value": 2.77, "label": "Local Base Rate"}, + "loading_conc": {"value": 80, "label": "Loading concentration (pM)"}, + "p0_num": {"value": 34.94, "label": "P0 %"}, + "p1_num": {"value": 62.81, "label": "P1 %"}, + "p2_num": {"value": 2.25, "label": "P2 %"}, + "polymerase_read_bases": {"value": 645.57, "label": "Total Cell Yield (Gb)"}, + "polymerase_read_length_mean": { + "value": 128878, + "label": "Mean Polymerase Read Length (bp)", + }, + "movie_minutes": {"value": 30, "label": "Run Time (hr)"}, + "percentage_deplexed_reads": { + "value": None, + "label": "Percentage of reads deplexed", + }, + "percentage_deplexed_bases": { + "value": None, + "label": "Percentage of bases deplexed", + }, + } + assert result["metrics"] == expected_metrics + + etrack = result["experiment_tracking"] + assert etrack is not None + assert etrack["num_samples"] == 1 + assert etrack["study_id"] == ["6457"] + assert etrack["study_name"] == "Tree of Life - ASG" + assert etrack["sample_id"] == "7880641" + assert etrack["sample_name"] == "TOL_ASG12404704" + assert etrack["library_type"] == ["PacBio_Ultra_Low_Input"] + assert etrack["tag_sequence"] == [] + + id_product = "c5babd5516f7b9faab8415927e5f300d5152bb96b8b922e768d876469a14fa5d" + response = test_client.get(f"/pacbio/products/seq_level?id_product={id_product}") + assert response.status_code == 200 + result = response.json() + + assert result["label"] == "D1" + assert result["run_name"] == "TRACTION-RUN-92" + assert result["instrument_name"] == "64222E" + assert result["instrument_type"] == "Sequel2e" + assert result["plate_number"] is None + assert result["id_product"] == id_product + assert result["metrics"]["smrt_link"]["dataset_uuid"] is None + assert result["qc_state"] is None + + id_product = "b5a7d41453097fe3cc59644a679186e64a2147833ecc76a2870c5fe8068835ae" + response = test_client.get(f"/pacbio/products/seq_level?id_product={id_product}") + assert response.status_code == 200 + result = response.json() + + assert result["label"] == "B1" + assert result["run_name"] == "TRACTION_RUN_1" + assert result["instrument_name"] == "64016" + assert result["instrument_type"] == "Sequel2" + assert result["plate_number"] is None + assert result["id_product"] == id_product + assert result["experiment_tracking"] is None + + expected_qc_state = { + "qc_state": "On hold", + "is_preliminary": True, + "qc_type": "sequencing", + "outcome": None, + "id_product": id_product, + "date_created": "2022-12-08T07:15:19", + "date_updated": "2022-12-08T07:15:19", + "user": "zx80@example.com", + "created_by": "LangQC", + } + assert result["qc_state"] == expected_qc_state + + id_product = "bc00984029864176324b91e0871a32a3692a54bd9b18f00b8596a2f2a166eca3" + response = test_client.get(f"/pacbio/products/seq_level?id_product={id_product}") + assert response.status_code == 200 + result = response.json() + + assert result["label"] == "A1" + assert result["run_name"] == "TRACTION_RUN_2" + assert result["instrument_name"] == "12345" + assert result["instrument_type"] == "Revio" + assert result["plate_number"] == 1 + assert result["id_product"] == id_product + + expected_qc_state = { + "qc_state": "Failed, Instrument", + "is_preliminary": True, + "qc_type": "sequencing", + "outcome": 0, + "id_product": id_product, + "date_created": "2022-12-07T15:13:56", + "date_updated": "2022-12-07T15:13:56", + "user": "zx80@example.com", + "created_by": "LangQC", + } + assert result["qc_state"] == expected_qc_state + + # The same run and well as above, different plate. + id_product = "3ff15eac7ac39e56d6ee2200b1b9a87a3da405911d79f5a1d250cca3ec697a9a" + response = test_client.get(f"/pacbio/products/seq_level?id_product={id_product}") + assert response.status_code == 200 + result = response.json() + + assert result["label"] == "A1" + assert result["run_name"] == "TRACTION_RUN_2" + assert result["instrument_name"] == "12345" + assert result["instrument_type"] == "Revio" + assert result["plate_number"] == 2 + assert result["id_product"] == id_product + assert result["qc_state"] is None diff --git a/tests/fixtures/well_data.py b/tests/fixtures/well_data.py index 9e3d2954..2df95e0d 100644 --- a/tests/fixtures/well_data.py +++ b/tests/fixtures/well_data.py @@ -204,7 +204,7 @@ "Revio", 1, ], - [ + [ "TRACTION_RUN_2", "A1", "2022-12-02 15:11:22", diff --git a/tests/test_wh_data_retrieval_pb.py b/tests/test_wh_data_retrieval_pb.py index 1f7c282c..2d96bcf5 100644 --- a/tests/test_wh_data_retrieval_pb.py +++ b/tests/test_wh_data_retrieval_pb.py @@ -112,12 +112,11 @@ def test_wells_in_runs_retrieval(mlwhdb_test_session, load_data4well_retrieval): assert run_names[i] == "TRACTION_RUN_2" plates_and_labels = [(row.well_label, row.plate_number) for row in wells] - expected_labels = 3* ["A1", "B1", "C1", "D1"] + expected_labels = 3 * ["A1", "B1", "C1", "D1"] expected_plate_numbers = 4 * [None] + 4 * [1] + 4 * [2] for i in range(0, 12): assert plates_and_labels[i][0] == expected_labels[i] - if i < 4 : + if i < 4: assert plates_and_labels[i][1] is None else: assert plates_and_labels[i][1] == expected_plate_numbers[i] - From 6d0e2df2321f6b737181e0760e9bf47d331c12d5 Mon Sep 17 00:00:00 2001 From: mgcam Date: Tue, 1 Aug 2023 11:48:57 +0100 Subject: [PATCH 03/29] Dropped unnecessary string interpolation --- tests/endpoints/test_single_well_qc_details.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/endpoints/test_single_well_qc_details.py b/tests/endpoints/test_single_well_qc_details.py index e2b86652..73b7f85c 100644 --- a/tests/endpoints/test_single_well_qc_details.py +++ b/tests/endpoints/test_single_well_qc_details.py @@ -20,7 +20,7 @@ def test_get_well_info( == f"PacBio well for product ID {id_product} not found." ) - response = test_client.get(f"/pacbio/products/seq_level?id_product=342") + response = test_client.get("/pacbio/products/seq_level?id_product=342") assert response.status_code == 422 assert response.json()["detail"].startswith("string does not match regex") From 87d6437f88097acf8bee49d08295aa2fe40eacc9 Mon Sep 17 00:00:00 2001 From: mgcam Date: Tue, 1 Aug 2023 13:44:50 +0100 Subject: [PATCH 04/29] Documented the URL schema for PacBio --- lang_qc/endpoints/pacbio_well.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/lang_qc/endpoints/pacbio_well.py b/lang_qc/endpoints/pacbio_well.py index 6b5d8745..b051e047 100644 --- a/lang_qc/endpoints/pacbio_well.py +++ b/lang_qc/endpoints/pacbio_well.py @@ -35,6 +35,36 @@ from lang_qc.util.auth import check_user from lang_qc.util.validation import check_product_id_is_valid +""" +A collection of API endpoints that are specific to the PacBio sequencing +platform. All URLs start with /pacbio/. + +The URLs starting with /pacbio/products are reserved for either retrieving, +or updating, or creating any information about a PacBio product. Sequence +data are out of scope. QC metrics and states and links to any relevant third +party web applications are in scope. + +For the purpose of this API the term 'product' has dual semantics. It refers +to either of the entities listed below: + 1. the target product the user is getting as the output of NPG own and + third party pipelines, + 2. any intermediate product that is used to assess the quality of the end + product, a single well being the prime example of this. + +Each product is characterised by a unique product ID, see +https://github.com/wtsi-npg/npg_id_generation + +A non-indexed single library sequenced in a well has the same product ID as +the well product. Therefore, in order to serve the correct response, it is +necessary to know the context of the request. This can be achieved by +different means: + 1. by adding an extra URL component (see /products/seq_level URL defined + in this package), + 2. by adding an extra parameter to the URL, + 3. for POST requests, by adding and a special field to the payload (see qc_type + in models in lang_qc.models.qc_state). +""" + router = APIRouter( prefix="/pacbio", tags=["pacbio"], From a2fa3cee38fa1c94f4189f0f69b3cb689eddf039 Mon Sep 17 00:00:00 2001 From: mgcam Date: Tue, 1 Aug 2023 15:27:24 +0100 Subject: [PATCH 05/29] Product id is moved from params to the URL itself --- lang_qc/endpoints/pacbio_well.py | 6 +++--- tests/endpoints/test_single_well_qc_details.py | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lang_qc/endpoints/pacbio_well.py b/lang_qc/endpoints/pacbio_well.py index b051e047..acc84c80 100644 --- a/lang_qc/endpoints/pacbio_well.py +++ b/lang_qc/endpoints/pacbio_well.py @@ -58,8 +58,8 @@ the well product. Therefore, in order to serve the correct response, it is necessary to know the context of the request. This can be achieved by different means: - 1. by adding an extra URL component (see /products/seq_level URL defined - in this package), + 1. by adding an extra URL component (see /products/{id_product}/seq_level + URL defined in this package), 2. by adding an extra parameter to the URL, 3. for POST requests, by adding and a special field to the payload (see qc_type in models in lang_qc.models.qc_state). @@ -147,7 +147,7 @@ def get_wells_in_run( @router.get( - "/products/seq_level", + "/products/{id_product}/seq_level", summary="Get full sequencing QC metrics and state for a product", responses={ status.HTTP_404_NOT_FOUND: {"description": "Well product does not exist"}, diff --git a/tests/endpoints/test_single_well_qc_details.py b/tests/endpoints/test_single_well_qc_details.py index 73b7f85c..480b9e60 100644 --- a/tests/endpoints/test_single_well_qc_details.py +++ b/tests/endpoints/test_single_well_qc_details.py @@ -13,19 +13,19 @@ def test_get_well_info( ) id_product = "cf18bd66e0f0895ea728c1d08103c62d3de8a57a5f879cee45f7b0acc028aa67" - response = test_client.get(f"/pacbio/products/seq_level?id_product={id_product}") + response = test_client.get(f"/pacbio/products/{id_product}/seq_level") assert response.status_code == 404 assert ( response.json()["detail"] == f"PacBio well for product ID {id_product} not found." ) - response = test_client.get("/pacbio/products/seq_level?id_product=342") + response = test_client.get("/pacbio/products/342/seq_level") assert response.status_code == 422 assert response.json()["detail"].startswith("string does not match regex") id_product = "cf18bd66e0f0895ea728c1d08103c62d3de8a57a5f879cee45f7b0acc028aa61" - response = test_client.get(f"/pacbio/products/seq_level?id_product={id_product}") + response = test_client.get(f"/pacbio/products/{id_product}/seq_level") assert response.status_code == 200 result = response.json() @@ -88,7 +88,7 @@ def test_get_well_info( assert etrack["tag_sequence"] == [] id_product = "c5babd5516f7b9faab8415927e5f300d5152bb96b8b922e768d876469a14fa5d" - response = test_client.get(f"/pacbio/products/seq_level?id_product={id_product}") + response = test_client.get(f"/pacbio/products/{id_product}/seq_level") assert response.status_code == 200 result = response.json() @@ -102,7 +102,7 @@ def test_get_well_info( assert result["qc_state"] is None id_product = "b5a7d41453097fe3cc59644a679186e64a2147833ecc76a2870c5fe8068835ae" - response = test_client.get(f"/pacbio/products/seq_level?id_product={id_product}") + response = test_client.get(f"/pacbio/products/{id_product}/seq_level") assert response.status_code == 200 result = response.json() @@ -128,7 +128,7 @@ def test_get_well_info( assert result["qc_state"] == expected_qc_state id_product = "bc00984029864176324b91e0871a32a3692a54bd9b18f00b8596a2f2a166eca3" - response = test_client.get(f"/pacbio/products/seq_level?id_product={id_product}") + response = test_client.get(f"/pacbio/products/{id_product}/seq_level") assert response.status_code == 200 result = response.json() @@ -154,7 +154,7 @@ def test_get_well_info( # The same run and well as above, different plate. id_product = "3ff15eac7ac39e56d6ee2200b1b9a87a3da405911d79f5a1d250cca3ec697a9a" - response = test_client.get(f"/pacbio/products/seq_level?id_product={id_product}") + response = test_client.get(f"/pacbio/products/{id_product}/seq_level") assert response.status_code == 200 result = response.json() From b9706f448277b2b2413944d2345257ccdd77e7a3 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Thu, 3 Aug 2023 11:31:16 +0000 Subject: [PATCH 06/29] Change URL control of QcView to use id_product --- frontend/src/components/WellTable.vue | 2 +- .../src/components/__tests__/WellTable.spec.js | 14 ++++++-------- frontend/src/stores/focusWell.js | 4 ++-- frontend/src/utils/__tests__/langqc.spec.js | 4 ++-- frontend/src/utils/__tests__/url.spec.js | 4 ++-- frontend/src/utils/langqc.js | 4 ++-- frontend/src/utils/url.js | 16 ++++++++++------ frontend/src/views/WellsByRun.vue | 8 ++++---- frontend/src/views/WellsByStatus.vue | 11 +++++------ 9 files changed, 34 insertions(+), 33 deletions(-) diff --git a/frontend/src/components/WellTable.vue b/frontend/src/components/WellTable.vue index 5418cc47..54e36e14 100644 --- a/frontend/src/components/WellTable.vue +++ b/frontend/src/components/WellTable.vue @@ -27,7 +27,7 @@ defineEmits(['wellSelected']) {{ wellObj.run_name }} - {{ wellObj.instrument_type }} {{ wellObj.instrument_name }} diff --git a/frontend/src/components/__tests__/WellTable.spec.js b/frontend/src/components/__tests__/WellTable.spec.js index d486b0bd..8d795d86 100644 --- a/frontend/src/components/__tests__/WellTable.spec.js +++ b/frontend/src/components/__tests__/WellTable.spec.js @@ -7,8 +7,8 @@ describe('Rows of data give rows in the table', () => { const table = mount(WellTable, { props: { wellCollection: [ - {run_name: 'TEST1', label: 'A1', instrument_name: '1234', instrument_type: 'Revio'}, - {run_name: 'TEST1', label: 'B1', instrument_name: '1234', instrument_type: 'Revio'}, + {run_name: 'TEST1', label: 'A1', instrument_name: '1234', instrument_type: 'Revio', id_product: 'ABCDEF'}, + {run_name: 'TEST1', label: 'B1', instrument_name: '1234', instrument_type: 'Revio', id_product: '123456'}, ] } }) @@ -29,13 +29,11 @@ describe('Rows of data give rows in the table', () => { test('Clicking triggers event emits containing required data', async () => { const rows = table.findAll('tr') await rows[1].find('button').trigger('click') - expect(table.emitted().wellSelected[0][0]).toHaveProperty('qcLabel') - expect(table.emitted().wellSelected[0][0]).toHaveProperty('qcRun') - expect(table.emitted().wellSelected[0][0].qcRun).toEqual('TEST1') - expect(table.emitted().wellSelected[0][0].qcLabel).toEqual('A1') + + expect(table.emitted().wellSelected[0][0]).toHaveProperty('idProduct') + expect(table.emitted().wellSelected[0][0].idProduct).toEqual('ABCDEF') await rows[2].find('button').trigger('click') - expect(table.emitted().wellSelected[1][0].qcRun).toEqual('TEST1') - expect(table.emitted().wellSelected[1][0].qcLabel).toEqual('B1') + expect(table.emitted().wellSelected[1][0].idProduct).toEqual('123456') }) }) \ No newline at end of file diff --git a/frontend/src/stores/focusWell.js b/frontend/src/stores/focusWell.js index 31b8703c..ef4a030d 100644 --- a/frontend/src/stores/focusWell.js +++ b/frontend/src/stores/focusWell.js @@ -71,8 +71,8 @@ export const useWellStore = defineStore('focusWell', { setFocusWell(well) { this.well = well; }, - loadWellDetail(runName, label) { - apiClient.getWellPromise(runName, label).then( + loadWellDetail(id_product) { + apiClient.getWellPromise(id_product).then( (well) => this.well = well ).catch( (error) => { diff --git a/frontend/src/utils/__tests__/langqc.spec.js b/frontend/src/utils/__tests__/langqc.spec.js index 0bd114bc..0fe84544 100644 --- a/frontend/src/utils/__tests__/langqc.spec.js +++ b/frontend/src/utils/__tests__/langqc.spec.js @@ -90,8 +90,8 @@ describe('Example fake remote api call', () => { client.getClientConfig(); expect(fetch.mock.calls[4][0]).toEqual('/api/config'); - client.getWellPromise('blah', 'A2'); - expect(fetch.mock.calls[5][0]).toEqual('/api/pacbio/run/blah/well/A2'); + client.getWellPromise('A12345'); + expect(fetch.mock.calls[5][0]).toEqual('/api/pacbio/products/A12345/seq_level'); client.getWellsForRunPromise('blah') expect(fetch.mock.calls[6][0]).toEqual('/api/pacbio/run/blah?page_size=100&page=1') diff --git a/frontend/src/utils/__tests__/url.spec.js b/frontend/src/utils/__tests__/url.spec.js index 643d2626..f8194ab5 100644 --- a/frontend/src/utils/__tests__/url.spec.js +++ b/frontend/src/utils/__tests__/url.spec.js @@ -63,8 +63,8 @@ describe('Test query setting in URL', () => { }) describe('Test QC param check', () => { - const testRun = {qcRun: 'test-run', qcLabel: 'A1'} - const alteredTestRun = {qcRun: 'test-run', qcLabel: 'B1'} + const testRun = {idProduct: 'AAAAAAAA'} + const alteredTestRun = {idProduct: 'BBBBBBBB'} test('No before, some after', () => { expect(qcQueryChanged(undefined, testRun)).toBe(true) diff --git a/frontend/src/utils/langqc.js b/frontend/src/utils/langqc.js index 79e757c0..590d12fc 100644 --- a/frontend/src/utils/langqc.js +++ b/frontend/src/utils/langqc.js @@ -84,8 +84,8 @@ export default class LangQc { ); } - getWellPromise(name, well) { - return this.fetchWrapper(this.buildUrl(['run', name, 'well', well])); + getWellPromise(id_product) { + return this.fetchWrapper(this.buildUrl(['products', id_product, 'seq_level'])); } getWellsForRunPromise(name) { diff --git a/frontend/src/utils/url.js b/frontend/src/utils/url.js index 215e2f96..aa000d0f 100644 --- a/frontend/src/utils/url.js +++ b/frontend/src/utils/url.js @@ -37,22 +37,26 @@ export function generateUrl(existingSettings, newSettings, path) { } export function qcQueryChanged(before, after) { - // Compares ?qcLabel and ?qcRun in the URL to tell when new data will be needed - // Invoke while watching vue-router.route.query + // Compares ?idProduct in the URL to tell when new data will + // be needed. Invoke while watching vue-router.route.query if ( - (after.qcLabel || after.qcRun) + (after.idProduct) && ( + // New page load before === undefined || ( - !before['qcLabel'] && !before['qcRun'] + // Focus removed in previous nav, e.g. go home + !before['idProduct'] ) || ( - before.qcLabel && before.qcRun && (after.qcLabel != before.qcLabel || after.qcRun != before.qcRun) + // Change of focus within page + before.idProduct + && (after.idProduct != before.idProduct) ) ) ) { return true - } else if (before !== undefined && (before['qcLabel'] || before['qcRun'])) { + } else if (before !== undefined && (before['idProduct'])) { // In case all arguments are removed return true } diff --git a/frontend/src/views/WellsByRun.vue b/frontend/src/views/WellsByRun.vue index d8a0f526..3ff472e4 100644 --- a/frontend/src/views/WellsByRun.vue +++ b/frontend/src/views/WellsByRun.vue @@ -36,8 +36,8 @@ function flatten_data(values) { getUserName((email) => { user.value = email }).then(); watch(() => route.query, (after, before) => { - if ((after['qcLabel'] || after['qcRun']) && qcQueryChanged(before, after)) { - focusWell.loadWellDetail(after.qcRun, after.qcLabel) + if ((after['idProduct']) && qcQueryChanged(before, after)) { + focusWell.loadWellDetail(after.idProduct) } }, { immediate: true } @@ -73,8 +73,8 @@ function updateUrlQuery(newParams) { } } -function wellSelected(well) { - updateUrlQuery({qcRun: well.qcRun, qcLabel: well.qcLabel}) +function wellSelected(idProduct) { + updateUrlQuery({idProduct: idProduct}) } diff --git a/frontend/src/views/WellsByStatus.vue b/frontend/src/views/WellsByStatus.vue index e099659d..6475600d 100644 --- a/frontend/src/views/WellsByStatus.vue +++ b/frontend/src/views/WellsByStatus.vue @@ -60,9 +60,9 @@ watch(() => route.query, (after, before) => { } // Handle the run and well to show in the QC Viewer - if (after && (after.qcLabel || after.qcRun) && qcQueryChanged(before, after)) { + if (after && (after.idProduct) && qcQueryChanged(before, after)) { // Somehow we need to capture the other parameter in case both have not been set - loadWellDetail(after.qcRun, after.qcLabel) + loadWellDetail(after.idProduct) } // Handle changes of tab in the table of wells. @@ -86,12 +86,11 @@ watch(() => route.query, (after, before) => { provide('activeTab', activeTab); provide('activeWell', readonly(activeWell)); -function loadWellDetail(runName, label) { +function loadWellDetail(id_product) { // Sets the well and QC state for the QcView components below - focusWell.loadWellDetail(runName, label) - activeWell.runName = runName; - activeWell.label = label; + focusWell.loadWellDetail(id_product); + [activeWell.runName, activeWell.label] = focusWell.getRunAndLabel; } function loadWells(status, page, pageSize) { From 6f707eb41f23cab832e20d5db6c9bf7c02a39704 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Thu, 3 Aug 2023 13:34:38 +0000 Subject: [PATCH 07/29] Bugfix for WellsByRun --- frontend/src/views/WellsByRun.vue | 6 +----- frontend/src/views/__tests__/WellsByRun.spec.js | 2 ++ 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/frontend/src/views/WellsByRun.vue b/frontend/src/views/WellsByRun.vue index 3ff472e4..e177ff5c 100644 --- a/frontend/src/views/WellsByRun.vue +++ b/frontend/src/views/WellsByRun.vue @@ -73,15 +73,11 @@ function updateUrlQuery(newParams) { } } -function wellSelected(idProduct) { - updateUrlQuery({idProduct: idProduct}) -} -