From 781347a23c521b80d695292f23ce195ee604b61b Mon Sep 17 00:00:00 2001 From: mwalzer Date: Tue, 23 Jul 2024 15:11:38 +0100 Subject: [PATCH 1/7] updated QC2 sample examples, file validation fixes, explanation corrections --- .../QC2-sample-example.mzQC.md | 42 +++++++------- .../examples/QC2-sample-example.mzQC | 56 +++++++++++++++---- 2 files changed, 66 insertions(+), 32 deletions(-) diff --git a/docs/pages/worked-examples/QC2-sample-example.mzQC.md b/docs/pages/worked-examples/QC2-sample-example.mzQC.md index afae5f1..72c6099 100644 --- a/docs/pages/worked-examples/QC2-sample-example.mzQC.md +++ b/docs/pages/worked-examples/QC2-sample-example.mzQC.md @@ -10,28 +10,30 @@ Find the complete file at the bottom of this document or in the example folder. The mzQC file is made from the acquision of a QC2 sample as described in [QCloud: A cloud-based quality control system for mass spectrometry-based proteomics laboratories](https://doi.org/10.1371/journal.pone.0189209). Optional (detailed) descriptions about the file can be placed into mzQC next to the general information about the file. ``` - "description": "This is an example of an mzQC file produced from a proteomics QC2 sample. 20 ug dried Pierce HeLa protein digest standard from Thermo Fisher Scientific (Part number: 88329) are dissolved in 200 uL of 0.1% formic acid in water to a final concentration of 100 ng/uL. A total amount of 1 uL (100ng) is injected per analysis.", +"description": "This is an example of an mzQC file produced from a proteomics QC2 sample. 20 ug dried Pierce HeLa protein digest standard from Thermo Fisher Scientific (Part number: 88329) are dissolved in 200 uL of 0.1% formic acid in water to a final concentration of 100 ng/uL. A total amount of 1 uL (100ng) is injected per analysis.", ``` -The metrics describe simple values like the cornerstone numbers of the acquisition and identification process, but also information specific to the QC method employed. Here, it is the mass accuracies and MS1 feature areas of selected peptides. With regular runs like this, the instrument's performance can be reliably monitored and maintenance interventions scheduled before valuable samples are wasted on an instrument running at sub-par performance. +The metrics describe simple values like the cornerstone numbers of the acquisition and identification processes, but also information specific to the QC method employed. Here, it is the mass accuracies and MS1 feature areas of selected peptides. With regularly measured runs from samples like this, the instrument's performance can be reliably monitored and maintenance interventions scheduled before valuable samples are wasted on an instrument running at sub-par performance. ``` - { - "accession":"MS:4000078", - "name":"QC2 sample mass accuracies", - "value":{ - "MS:1003169":["YAEAVTR","STLTDSLVC(Carbamidomethyl)K","SLADELALVDVLEDK","NPDDITNEEYGEFYK","LAVDEEENADNNTK","FEELNMDLFR","EAALSTALSEK","DDVAQTDLLQIDPNFGSK","RFPGYDSESK","EATTEFSVDAR","EQFLDGDGWTSR"], - "MS:4000072":[-0.2346854518740762,-0.08024023890884578,-0.1322012562867409,-0.2259441806378488,-0.10596535779273217,0.28345130855013684,-0.08600783742175504,-0.3683484942567654,-0.03348194493295555,-0.41789282666789496,-0.12794363836212685] - } - }, - { - "accession":"MS:4000079", - "name":"QC2 sample intensities", - "value":{ - "MS:1003169":["YAEAVTR","STLTDSLVC(Carbamidomethyl)K","SLADELALVDVLEDK","NPDDITNEEYGEFYK","LAVDEEENADNNTK","FEELNMDLFR","EAALSTALSEK","DDVAQTDLLQIDPNFGSK","RFPGYDSESK","EATTEFSVDAR","EQFLDGDGWTSR"], - "MS:1001844":[1234940000.0,922790000.0,80819100.0,478714000.0,254935000.0,52841200.0,243597000.0,24581800.0,707504000.0,129063000.0,205583000.0] - } - } + { + "accession":"MS:4000078", + "name":"QC2 sample mass accuracies", + "description": "Observed mass accuracy for the peptides of a QC2 sample measurement. The table should contain the peptides as described in the QC2 sample metric term, missing are interpreted as not detected.", + "value":{ + "MS:1003169":["YAEAVTR","STLTDSLVC(Carbamidomethyl)K","SLADELALVDVLEDK","NPDDITNEEYGEFYK","LAVDEEENADNNTK","FEELNMDLFR","EAALSTALSEK","DDVAQTDLLQIDPNFGSK","RFPGYDSESK","EATTEFSVDAR","EQFLDGDGWTSR"], + "MS:4000072":[-0.2346854518740762,-0.08024023890884578,-0.1322012562867409,-0.2259441806378488,-0.10596535779273217,0.28345130855013684,-0.08600783742175504,-0.3683484942567654,-0.03348194493295555,-0.41789282666789496,-0.12794363836212685] + } + }, + { + "accession":"MS:4000079", + "name":"QC2 sample intensities", + "description": "Observed intensities for the peptides of a QC2 sample measurement within 5 ppm and +/- 240 s RT tolerance. Different metrics of observed intensities are possible, at least one must be present. The table should contain the peptides as defined in the parent QC2 sample metric term, missing are interpreted as not detected.", + "value":{ + "MS:1003169":["YAEAVTR","STLTDSLVC(Carbamidomethyl)K","SLADELALVDVLEDK","NPDDITNEEYGEFYK","LAVDEEENADNNTK","FEELNMDLFR","EAALSTALSEK","DDVAQTDLLQIDPNFGSK","RFPGYDSESK","EATTEFSVDAR","EQFLDGDGWTSR"], + "MS:1001844":[1234940000.0,922790000.0,80819100.0,478714000.0,254935000.0,52841200.0,243597000.0,24581800.0,707504000.0,129063000.0,205583000.0] + } + } ``` -The individual peptides' values are stored in a table, that is defined by the respective metric cv term. In case of the feature areas, there is a column indicating the peptide and another column for the respective feature area. +The individual peptides' values are stored in a table, that is defined by the respective metric cv term describing part of the QC method. In case of "QC2 sample intensities", there is are columns indicating the peptide (required) and another column for the respective feature area representing the peptide intensities in the run. There are other column types defined to allow for flexible use-case adaption. ``` [Term] id: MS:4000079 @@ -47,7 +49,7 @@ relationship: has_optional_column MS:1001844 ! MS1 feature area relationship: has_optional_column MS:1001843 ! MS1 feature maximum intensity relationship: has_optional_column MS:1003085 ! previous MSn-1 scan precursor intensity ``` -Since each column is in turn defined by a cv term, the column can also be assigned an expected value type and unit. In this case the feature area column is expected to contain values of `MS:1001844 - MS1 feature area`s. This concept allows for easier automated metric consumption and even generic plotting of graphs. With a collection consecutive QC2 sample mzQC files, a plot like a Levey-Jennings Control Chart are easily achieved. +Since each column is in turn defined by a cv term, the column can also be assigned an expected value type and unit. In this example's case the feature area column is expected to contain values of `MS:1001844 - MS1 feature area`s. This concept allows for easier automated metric consumption and even generic plotting of graphs. With a collection consecutive QC2 sample mzQC files, a plot like a Levey-Jennings Control Chart are easily achieved. ![Levey-Jennings Control Chart](../../pages/figures/LJCC.png) diff --git a/specification_documents/examples/QC2-sample-example.mzQC b/specification_documents/examples/QC2-sample-example.mzQC index 21e8b2c..c313090 100644 --- a/specification_documents/examples/QC2-sample-example.mzQC +++ b/specification_documents/examples/QC2-sample-example.mzQC @@ -1,5 +1,5 @@ -{ - "mzQC": { +{"mzQC": + { "creationDate": "2020-12-03T19:51:02Z", "version": "1.0.0", "contactName": "Mathias Walzer", @@ -11,7 +11,7 @@ "inputFiles": [ { "location": "file://tmp/QC2_18052020.mzML", - "name": "QC type 2 sample", + "name": "QC2_18052020.mzML", "fileFormat": { "accession": "MS:1000584", "name": "mzML format" @@ -33,20 +33,30 @@ "value": "LTQ Orbitrap Velos" } ] + }, + { + "location": "file://tmp/QC2_18052020.mzid", + "name": "QC2_18052020", + "fileFormat": { + "accession": "MS:1002073", + "name": "mzIdentML format" + } } ], "analysisSoftware": [ { "accession": "MS:1001058", "name": "quality estimation by manual validation", + "description": "The quality estimation was done manually.", "version": "0", "uri": "https://dx.doi.org/10.1021/pr201071t" }, { "accession": "MS:1000799", "name": "custom unreleased software tool", - "version": "0", + "description": "A software tool that has not yet been released. The value should describe the software. Please do not use this term for publicly available software - contact the PSI-MS working group in order to have another CV term added.", "value": "mzqc-pylib", + "version": "0", "uri": "https://hupo-psi.github.io/mzQC/unknown.html" } ] @@ -55,26 +65,47 @@ { "accession": "MS:4000060", "name": "number of MS2 spectra", - "value": 62299 + "description": "The number of MS2 events in the run.", + "value": 62299, + "unit": { + "accession": "UO:0000189", + "name": "count unit" + } }, { "accession": "MS:1003251", "name": "count of identified spectra", - "value": 24765 + "description": "The number of spectra that pass the threshold to be considered identified with sufficient confidence.", + "value": 24765, + "unit": { + "accession": "UO:0000189", + "name": "count unit" + } }, { "accession": "MS:1003250", "name": "count of identified peptidoforms", - "value": 22241 + "description": "The number of peptidoforms that pass the threshold to be considered identified with sufficient confidence.", + "value": 22241, + "unit": { + "accession": "UO:0000189", + "name": "count unit" + } }, { "accession": "MS:1002404", "name": "count of identified proteins", - "value": "5504" + "description": "The number of proteins that have been identified, which must match the number of groups that pass the threshold in the file.", + "value": "5504", + "unit": { + "accession": "UO:0000189", + "name": "count unit" + } }, { "accession": "MS:4000078", "name": "QC2 sample mass accuracies", + "description": "Observed mass accuracy for the peptides of a QC2 sample measurement. The table should contain the peptides as described in the QC2 sample metric term, missing are interpreted as not detected.", "value": { "MS:1003169": [ "YAEAVTR", @@ -107,6 +138,7 @@ { "accession": "MS:4000079", "name": "QC2 sample intensities", + "description": "Observed intensities for the peptides of a QC2 sample measurement within 5 ppm and +/- 240 s RT tolerance. Different metrics of observed intensities are possible, at least one must be present. The table should contain the peptides as defined in the parent QC2 sample metric term, missing are interpreted as not detected.", "value": { "MS:1003169": [ "YAEAVTR", @@ -142,9 +174,9 @@ "controlledVocabularies": [ { "name": "Proteomics Standards Initiative Mass Spectrometry Ontology", - "uri": "https://github.com/HUPO-PSI/psi-ms-CV/releases/download/v4.1.79/psi-ms.obo", - "version": "4.1.79" + "uri": "https://github.com/HUPO-PSI/psi-ms-CV/releases/download/v4.1.157/psi-ms.obo", + "version": "4.1.157" } ] - } -} + } +} \ No newline at end of file From 9a5f2c888bbd42fb347e23c65e5511903f537c6e Mon Sep 17 00:00:00 2001 From: mwalzer Date: Tue, 23 Jul 2024 15:29:03 +0100 Subject: [PATCH 2/7] QC2 sample example markdown formatting --- .../worked-examples/QC2-sample-example.mzQC.md | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/docs/pages/worked-examples/QC2-sample-example.mzQC.md b/docs/pages/worked-examples/QC2-sample-example.mzQC.md index 72c6099..d8891b0 100644 --- a/docs/pages/worked-examples/QC2-sample-example.mzQC.md +++ b/docs/pages/worked-examples/QC2-sample-example.mzQC.md @@ -12,7 +12,9 @@ Optional (detailed) descriptions about the file can be placed into mzQC next to ``` "description": "This is an example of an mzQC file produced from a proteomics QC2 sample. 20 ug dried Pierce HeLa protein digest standard from Thermo Fisher Scientific (Part number: 88329) are dissolved in 200 uL of 0.1% formic acid in water to a final concentration of 100 ng/uL. A total amount of 1 uL (100ng) is injected per analysis.", ``` -The metrics describe simple values like the cornerstone numbers of the acquisition and identification processes, but also information specific to the QC method employed. Here, it is the mass accuracies and MS1 feature areas of selected peptides. With regularly measured runs from samples like this, the instrument's performance can be reliably monitored and maintenance interventions scheduled before valuable samples are wasted on an instrument running at sub-par performance. +The metrics describe simple values like the cornerstone numbers of the acquisition and identification processes, but also information specific to the QC method employed. +Here, it is the mass accuracies and MS1 feature areas of selected peptides. +With regularly measured runs from samples like this, the instrument's performance can be reliably monitored and maintenance interventions scheduled before valuable samples are wasted on an instrument running at sub-par performance. ``` { "accession":"MS:4000078", @@ -33,7 +35,9 @@ The metrics describe simple values like the cornerstone numbers of the acquisiti } } ``` -The individual peptides' values are stored in a table, that is defined by the respective metric cv term describing part of the QC method. In case of "QC2 sample intensities", there is are columns indicating the peptide (required) and another column for the respective feature area representing the peptide intensities in the run. There are other column types defined to allow for flexible use-case adaption. +The individual peptides' values are stored in a table, that is defined by the respective metric cv term describing part of the QC method. +In case of "QC2 sample intensities", there is are columns indicating the peptide (required) and another column for the respective feature area representing the peptide intensities in the run. +There are other column types defined to allow for flexible use-case adaption. ``` [Term] id: MS:4000079 @@ -49,7 +53,10 @@ relationship: has_optional_column MS:1001844 ! MS1 feature area relationship: has_optional_column MS:1001843 ! MS1 feature maximum intensity relationship: has_optional_column MS:1003085 ! previous MSn-1 scan precursor intensity ``` -Since each column is in turn defined by a cv term, the column can also be assigned an expected value type and unit. In this example's case the feature area column is expected to contain values of `MS:1001844 - MS1 feature area`s. This concept allows for easier automated metric consumption and even generic plotting of graphs. With a collection consecutive QC2 sample mzQC files, a plot like a Levey-Jennings Control Chart are easily achieved. +Since each column is in turn defined by a cv term, the column can also be assigned an expected value type and unit. +In this example's case the feature area column is expected to contain values of `MS:1001844 - MS1 feature area`s. +This concept allows for easier automated metric consumption and even generic plotting of graphs. +With a collection consecutive QC2 sample mzQC files, a plot like a Levey-Jennings Control Chart are easily achieved. ![Levey-Jennings Control Chart](../../pages/figures/LJCC.png) From 24344652d3f3146da49cac01bf9741817b9f135c Mon Sep 17 00:00:00 2001 From: Wout Bittremieux Date: Tue, 30 Jul 2024 17:00:36 +0200 Subject: [PATCH 3/7] Update example --- docs/pages/examples.md | 2 +- .../QC2-sample-example.mzQC.md | 229 +++++++++++++++--- .../examples/QC2-sample-example.mzQC | 10 +- 3 files changed, 199 insertions(+), 42 deletions(-) diff --git a/docs/pages/examples.md b/docs/pages/examples.md index bd09aec..5291938 100644 --- a/docs/pages/examples.md +++ b/docs/pages/examples.md @@ -8,7 +8,7 @@ Here are a number of worked examples, that, each for its own use-case, go step-b - [Single mass spectrometry run](intro_run/) - [Sets of runs](set-of-runs/) -- [QC sample mzQC](QC2-sample-example/) +- [Tracking instrument performance using QC samples](intro_qc2/) - [in mzML](mzml-mzqc-example/) - [Using USI with mzQC](USI-example/) - [Batch correction](metabo-batches/) diff --git a/docs/pages/worked-examples/QC2-sample-example.mzQC.md b/docs/pages/worked-examples/QC2-sample-example.mzQC.md index d8891b0..ff95b0e 100644 --- a/docs/pages/worked-examples/QC2-sample-example.mzQC.md +++ b/docs/pages/worked-examples/QC2-sample-example.mzQC.md @@ -1,43 +1,193 @@ --- layout: page -title: "QC Sample-Run Example of mzQC" -permalink: /examples/QC2-sample-example/ +title: "Introduction to mzQC – Tracking Instrument Performance" +permalink: /examples/intro_qc2/ --- -Here, we describe details of a mzQC JSON document used for a QC sample mass spectrometry run. -For description of the general structure of mzQC, see the Single-Run Example of mzQC. -Find the complete file at the bottom of this document or in the example folder. -The mzQC file is made from the acquision of a QC2 sample as described in [QCloud: A cloud-based quality control system for mass spectrometry-based proteomics laboratories](https://doi.org/10.1371/journal.pone.0189209). -Optional (detailed) descriptions about the file can be placed into mzQC next to the general information about the file. -``` +This document outlines the utilization of an mzQC file for quality control (QC) of a mass spectrometry proteomics experiment. +The mzQC file discussed here is derived from a QC2 sample, following protocols established in the publication, [QCloud: A cloud-based quality control system for mass spectrometry-based proteomics laboratories](https://doi.org/10.1371/journal.pone.0189209). +A QC2 sample is defined as a high complexity sample that mimics real samples analysed in a proteomics laboratory, and is meant to be injected 1–5 times per week as a sample to test system suitability. + +Here we demonstrate how real-life QC metrics are calculated for a single mass spectrometry run using tools such as QCloud. +You can view the complete structure of this mzQC example [here](https://github.com/HUPO-PSI/mzQC/tree/main/specification_documents/examples/QC2-sample-example.mzQC). + +## File description + +Our mzQC file example provides a detailed description of its contents and purpose, which allows users to understand the context and relevance of the QC metrics: + +```json "description": "This is an example of an mzQC file produced from a proteomics QC2 sample. 20 ug dried Pierce HeLa protein digest standard from Thermo Fisher Scientific (Part number: 88329) are dissolved in 200 uL of 0.1% formic acid in water to a final concentration of 100 ng/uL. A total amount of 1 uL (100ng) is injected per analysis.", ``` -The metrics describe simple values like the cornerstone numbers of the acquisition and identification processes, but also information specific to the QC method employed. -Here, it is the mass accuracies and MS1 feature areas of selected peptides. -With regularly measured runs from samples like this, the instrument's performance can be reliably monitored and maintenance interventions scheduled before valuable samples are wasted on an instrument running at sub-par performance. -``` + +## Input files specification + +The mzQC file lists necessary input files including both the raw mass spectrometry data file (mzML) and the peptide identification data file (mzIdentML). +The latter is required for deriving ID-based QC metrics later on. + +```json +"inputFiles": [ { - "accession":"MS:4000078", - "name":"QC2 sample mass accuracies", - "description": "Observed mass accuracy for the peptides of a QC2 sample measurement. The table should contain the peptides as described in the QC2 sample metric term, missing are interpreted as not detected.", - "value":{ - "MS:1003169":["YAEAVTR","STLTDSLVC(Carbamidomethyl)K","SLADELALVDVLEDK","NPDDITNEEYGEFYK","LAVDEEENADNNTK","FEELNMDLFR","EAALSTALSEK","DDVAQTDLLQIDPNFGSK","RFPGYDSESK","EATTEFSVDAR","EQFLDGDGWTSR"], - "MS:4000072":[-0.2346854518740762,-0.08024023890884578,-0.1322012562867409,-0.2259441806378488,-0.10596535779273217,0.28345130855013684,-0.08600783742175504,-0.3683484942567654,-0.03348194493295555,-0.41789282666789496,-0.12794363836212685] - } + "location": "file://tmp/QC2_18052020.mzML", + "name": "QC2_18052020", + "fileFormat": { + "accession": "MS:1000584", + "name": "mzML format" + }, + "fileProperties": [ + { + "accession": "MS:1000747", + "name": "completion time", + "value": "2020-05-18 09:20:48" + }, + { + "accession": "MS:1000569", + "name": "SHA-1", + "value": "fbe692c887404179518089abc670484c" + }, + { + "accession": "MS:1000031", + "name": "instrument model", + "value": "LTQ Orbitrap Velos" + } + ] }, { - "accession":"MS:4000079", - "name":"QC2 sample intensities", - "description": "Observed intensities for the peptides of a QC2 sample measurement within 5 ppm and +/- 240 s RT tolerance. Different metrics of observed intensities are possible, at least one must be present. The table should contain the peptides as defined in the parent QC2 sample metric term, missing are interpreted as not detected.", - "value":{ - "MS:1003169":["YAEAVTR","STLTDSLVC(Carbamidomethyl)K","SLADELALVDVLEDK","NPDDITNEEYGEFYK","LAVDEEENADNNTK","FEELNMDLFR","EAALSTALSEK","DDVAQTDLLQIDPNFGSK","RFPGYDSESK","EATTEFSVDAR","EQFLDGDGWTSR"], - "MS:1001844":[1234940000.0,922790000.0,80819100.0,478714000.0,254935000.0,52841200.0,243597000.0,24581800.0,707504000.0,129063000.0,205583000.0] - } + "location": "file://tmp/QC2_18052020.mzid", + "name": "QC2_18052020", + "fileFormat": { + "accession": "MS:1002073", + "name": "mzIdentML format" + } + } +], +``` + +## Metrics calculation + +First, the mzQC file includes single-value metrics that provide quantifiable data on the MS data acquisition process, such as the number of MS2 spectra, identified spectra, peptides, and proteins: + +```json +{ + "accession": "MS:4000060", + "name": "number of MS2 spectra", + "description": "The number of MS2 events in the run.", + "value": 62299, + "unit": { + "accession": "UO:0000189", + "name": "count unit" + } +}, +{ + "accession": "MS:1003251", + "name": "count of identified spectra", + "description": "The number of spectra that pass the threshold to be considered identified with sufficient confidence.", + "value": 24765, + "unit": { + "accession": "UO:0000189", + "name": "count unit" + } +}, +{ + "accession": "MS:1003250", + "name": "count of identified peptidoforms", + "description": "The number of peptidoforms that pass the threshold to be considered identified with sufficient confidence.", + "value": 22241, + "unit": { + "accession": "UO:0000189", + "name": "count unit" } +}, +{ + "accession": "MS:1002404", + "name": "count of identified proteins", + "description": "The number of proteins that have been identified, which must match the number of groups that pass the threshold in the file.", + "value": "5504", + "unit": { + "accession": "UO:0000189", + "name": "count unit" + } +}, +``` + +Next, the file includes metrics on precursor mass accuracies and sample intensities for selected peptides from the QC2 sample: + +```json +{ + "accession": "MS:4000078", + "name": "QC2 sample mass accuracies", + "description": "Observed mass accuracy for the peptides of a QC2 sample measurement. The table should contain the peptides as described in the QC2 sample metric term, missing are interpreted as not detected.", + "value": { + "MS:1003169": [ + "YAEAVTR", + "STLTDSLVC[Carbamidomethyl]K", + "SLADELALVDVLEDK", + "NPDDITNEEYGEFYK", + "LAVDEEENADNNTK", + "FEELNMDLFR", + "EAALSTALSEK", + "DDVAQTDLLQIDPNFGSK", + "RFPGYDSESK", + "EATTEFSVDAR", + "EQFLDGDGWTSR" + ], + "MS:4000072": [ + -0.2346854518740762, + -0.08024023890884578, + -0.1322012562867409, + -0.2259441806378488, + -0.10596535779273217, + 0.28345130855013684, + -0.08600783742175504, + -0.3683484942567654, + -0.03348194493295555, + -0.41789282666789496, + -0.12794363836212685 + ] + } +}, +{ + "accession": "MS:4000079", + "name": "QC2 sample intensities", + "description": "Observed intensities for the peptides of a QC2 sample measurement within 5 ppm and +/- 240 s RT tolerance. Different metrics of observed intensities are possible, at least one must be present. The table should contain the peptides as defined in the parent QC2 sample metric term, missing are interpreted as not detected.", + "value": { + "MS:1003169": [ + "YAEAVTR", + "STLTDSLVC[Carbamidomethyl]K", + "SLADELALVDVLEDK", + "NPDDITNEEYGEFYK", + "LAVDEEENADNNTK", + "FEELNMDLFR", + "EAALSTALSEK", + "DDVAQTDLLQIDPNFGSK", + "RFPGYDSESK", + "EATTEFSVDAR", + "EQFLDGDGWTSR" + ], + "MS:1001844": [ + 1234940000, + 922790000, + 80819100, + 478714000, + 254935000, + 52841200, + 243597000, + 24581800, + 707504000, + 129063000, + 205583000 + ] + } +} ``` -The individual peptides' values are stored in a table, that is defined by the respective metric cv term describing part of the QC method. -In case of "QC2 sample intensities", there is are columns indicating the peptide (required) and another column for the respective feature area representing the peptide intensities in the run. -There are other column types defined to allow for flexible use-case adaption. + +These metrics are structured as tables within the mzQC document, with each row representing a peptide and columns detailing the specific metric values. + +## Controlled vocabulary definition + +All QC metrics in an mzQC file should be backed by a formal definition in a controlled vocabulary (CV) or ontology. +By default, mzQC sources its metrics from the [PSI-MS CV](https://github.com/HUPO-PSI/psi-ms-CV/). +For example, the "QC2 sample intensities" metric is formally defined in the PSI-MS CV as follows: + ``` [Term] id: MS:4000079 @@ -53,13 +203,20 @@ relationship: has_optional_column MS:1001844 ! MS1 feature area relationship: has_optional_column MS:1001843 ! MS1 feature maximum intensity relationship: has_optional_column MS:1003085 ! previous MSn-1 scan precursor intensity ``` -Since each column is in turn defined by a cv term, the column can also be assigned an expected value type and unit. -In this example's case the feature area column is expected to contain values of `MS:1001844 - MS1 feature area`s. -This concept allows for easier automated metric consumption and even generic plotting of graphs. -With a collection consecutive QC2 sample mzQC files, a plot like a Levey-Jennings Control Chart are easily achieved. -![Levey-Jennings Control Chart](../../pages/figures/LJCC.png) +This CV term is structured to capture multiple aspects of peptide detection and quantification, which include: + +- [ProForma](https://github.com/HUPO-PSI/ProForma) peptidoform sequence: Mandatory column denoting the peptides that were detected. +- Abundance measurements: Various optional columns can be used to record the peptide abundances using different strategies. Typically only one of those optional columns will be present. +The example above records the peptide intensities based on the MS1 feature areas. Thus, the second column for the metric has name `MS:1001844`, corresponding to the definition of this metric. + +## Visualization and data analysis + +The structured data in mzQC allows for effective visualization and analysis, such as plotting trends across multiple peptides, samples, or experiments. +This can help identify any deviations or potential issues with the mass spectrometry process, prompting timely maintenance and calibration actions to maintain optimal performance. +For example, Levey-Jennings charts can be used to enable quick visual assessment of instrument stability or drift, critical for high-stakes or high-throughput proteomics workflows: + +![Levey-Jennings Control Chart](../../pages/figures/LJCC.png) -### This is the mzQC file once again, in full: -**[QC2-sample-example.mzQC](https://github.com/HUPO-PSI/mzQC/tree/main/specification_documents/examples/QC2-sample-example.mzQC)** \ No newline at end of file +This example demonstrates how QC information in mzQC files helps in monitoring instrument performance, ensuring that maintenance is proactive and timely, thereby preserving the integrity and effectiveness of subsequent analyses. diff --git a/specification_documents/examples/QC2-sample-example.mzQC b/specification_documents/examples/QC2-sample-example.mzQC index c313090..0d0aa3c 100644 --- a/specification_documents/examples/QC2-sample-example.mzQC +++ b/specification_documents/examples/QC2-sample-example.mzQC @@ -1,5 +1,5 @@ -{"mzQC": - { +{ + "mzQC": { "creationDate": "2020-12-03T19:51:02Z", "version": "1.0.0", "contactName": "Mathias Walzer", @@ -11,7 +11,7 @@ "inputFiles": [ { "location": "file://tmp/QC2_18052020.mzML", - "name": "QC2_18052020.mzML", + "name": "QC2_18052020", "fileFormat": { "accession": "MS:1000584", "name": "mzML format" @@ -178,5 +178,5 @@ "version": "4.1.157" } ] - } -} \ No newline at end of file + } +} From d1ce284fc2d0066e6af6477121be52dfd142d329 Mon Sep 17 00:00:00 2001 From: Wout Bittremieux Date: Tue, 30 Jul 2024 17:45:39 +0200 Subject: [PATCH 4/7] Update file names --- docs/pages/examples.md | 2 +- docs/pages/figures/{LJCC.png => intro_qc2_ljcc.png} | Bin .../{QC2-sample-example.mzQC.md => intro_qc2.md} | 4 ++-- .../{QC2-sample-example.mzQC => intro_qc2.mzQC} | 0 4 files changed, 3 insertions(+), 3 deletions(-) rename docs/pages/figures/{LJCC.png => intro_qc2_ljcc.png} (100%) rename docs/pages/worked-examples/{QC2-sample-example.mzQC.md => intro_qc2.md} (98%) rename specification_documents/examples/{QC2-sample-example.mzQC => intro_qc2.mzQC} (100%) diff --git a/docs/pages/examples.md b/docs/pages/examples.md index 4d87651..9488d86 100644 --- a/docs/pages/examples.md +++ b/docs/pages/examples.md @@ -8,7 +8,7 @@ The following use cases provide several hands-on examples of how mzQC files are - [Representing QC data for an individual mass spectrometry run](intro_run/) - [Deriving QC data from multiple related mass spectrometry runs](intro_set/) -- [Tracking instrument performance using QC samples](intro_qc2/) +- [Tracking instrument performance using controlled QC samples](intro_qc2/) - [Batch correction](metabo-batches/) Additionally, for more advanced usage, mzQC can closely interoperate with several other file formats developed by the Proteomics Standards Initiative: diff --git a/docs/pages/figures/LJCC.png b/docs/pages/figures/intro_qc2_ljcc.png similarity index 100% rename from docs/pages/figures/LJCC.png rename to docs/pages/figures/intro_qc2_ljcc.png diff --git a/docs/pages/worked-examples/QC2-sample-example.mzQC.md b/docs/pages/worked-examples/intro_qc2.md similarity index 98% rename from docs/pages/worked-examples/QC2-sample-example.mzQC.md rename to docs/pages/worked-examples/intro_qc2.md index ff95b0e..ab26bd5 100644 --- a/docs/pages/worked-examples/QC2-sample-example.mzQC.md +++ b/docs/pages/worked-examples/intro_qc2.md @@ -9,7 +9,7 @@ The mzQC file discussed here is derived from a QC2 sample, following protocols e A QC2 sample is defined as a high complexity sample that mimics real samples analysed in a proteomics laboratory, and is meant to be injected 1–5 times per week as a sample to test system suitability. Here we demonstrate how real-life QC metrics are calculated for a single mass spectrometry run using tools such as QCloud. -You can view the complete structure of this mzQC example [here](https://github.com/HUPO-PSI/mzQC/tree/main/specification_documents/examples/QC2-sample-example.mzQC). +You can view the complete structure of this mzQC example [here](https://github.com/HUPO-PSI/mzQC/tree/main/specification_documents/examples/intro_qc2.mzQC). ## File description @@ -217,6 +217,6 @@ The structured data in mzQC allows for effective visualization and analysis, suc This can help identify any deviations or potential issues with the mass spectrometry process, prompting timely maintenance and calibration actions to maintain optimal performance. For example, Levey-Jennings charts can be used to enable quick visual assessment of instrument stability or drift, critical for high-stakes or high-throughput proteomics workflows: -![Levey-Jennings Control Chart](../../pages/figures/LJCC.png) +![Levey-Jennings control chart](../../pages/figures/intro_qc2_ljcc.png) This example demonstrates how QC information in mzQC files helps in monitoring instrument performance, ensuring that maintenance is proactive and timely, thereby preserving the integrity and effectiveness of subsequent analyses. diff --git a/specification_documents/examples/QC2-sample-example.mzQC b/specification_documents/examples/intro_qc2.mzQC similarity index 100% rename from specification_documents/examples/QC2-sample-example.mzQC rename to specification_documents/examples/intro_qc2.mzQC From 8d3a181d98b155506ed512e073698d88ea97b3dc Mon Sep 17 00:00:00 2001 From: Wout Bittremieux Date: Tue, 30 Jul 2024 17:54:57 +0200 Subject: [PATCH 5/7] Spelling --- docs/pages/worked-examples/intro_qc2.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/pages/worked-examples/intro_qc2.md b/docs/pages/worked-examples/intro_qc2.md index ab26bd5..3ca679f 100644 --- a/docs/pages/worked-examples/intro_qc2.md +++ b/docs/pages/worked-examples/intro_qc2.md @@ -6,7 +6,7 @@ permalink: /examples/intro_qc2/ This document outlines the utilization of an mzQC file for quality control (QC) of a mass spectrometry proteomics experiment. The mzQC file discussed here is derived from a QC2 sample, following protocols established in the publication, [QCloud: A cloud-based quality control system for mass spectrometry-based proteomics laboratories](https://doi.org/10.1371/journal.pone.0189209). -A QC2 sample is defined as a high complexity sample that mimics real samples analysed in a proteomics laboratory, and is meant to be injected 1–5 times per week as a sample to test system suitability. +A QC2 sample is defined as a high complexity sample that mimics real samples analyzed in a proteomics laboratory, and is meant to be injected 1–5 times per week as a sample to test system suitability. Here we demonstrate how real-life QC metrics are calculated for a single mass spectrometry run using tools such as QCloud. You can view the complete structure of this mzQC example [here](https://github.com/HUPO-PSI/mzQC/tree/main/specification_documents/examples/intro_qc2.mzQC). From 6c6f9278fea5b72d6ef35177c003ebd94fdd1302 Mon Sep 17 00:00:00 2001 From: Wout Bittremieux Date: Wed, 7 Aug 2024 17:04:57 +0200 Subject: [PATCH 6/7] Fix duplicate file names --- docs/pages/worked-examples/intro_qc2.md | 4 ++-- specification_documents/examples/intro_qc2.mzQC | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/pages/worked-examples/intro_qc2.md b/docs/pages/worked-examples/intro_qc2.md index 3ca679f..157a006 100644 --- a/docs/pages/worked-examples/intro_qc2.md +++ b/docs/pages/worked-examples/intro_qc2.md @@ -28,7 +28,7 @@ The latter is required for deriving ID-based QC metrics later on. "inputFiles": [ { "location": "file://tmp/QC2_18052020.mzML", - "name": "QC2_18052020", + "name": "QC2_18052020_mzML", "fileFormat": { "accession": "MS:1000584", "name": "mzML format" @@ -53,7 +53,7 @@ The latter is required for deriving ID-based QC metrics later on. }, { "location": "file://tmp/QC2_18052020.mzid", - "name": "QC2_18052020", + "name": "QC2_18052020_mzId", "fileFormat": { "accession": "MS:1002073", "name": "mzIdentML format" diff --git a/specification_documents/examples/intro_qc2.mzQC b/specification_documents/examples/intro_qc2.mzQC index 0d0aa3c..1d71aaa 100644 --- a/specification_documents/examples/intro_qc2.mzQC +++ b/specification_documents/examples/intro_qc2.mzQC @@ -11,7 +11,7 @@ "inputFiles": [ { "location": "file://tmp/QC2_18052020.mzML", - "name": "QC2_18052020", + "name": "QC2_18052020_mzML", "fileFormat": { "accession": "MS:1000584", "name": "mzML format" @@ -36,7 +36,7 @@ }, { "location": "file://tmp/QC2_18052020.mzid", - "name": "QC2_18052020", + "name": "QC2_18052020_mzId", "fileFormat": { "accession": "MS:1002073", "name": "mzIdentML format" From 39bef2e6d9b1de8e6d8fdd8b13547a778463ffd9 Mon Sep 17 00:00:00 2001 From: Wout Bittremieux Date: Wed, 21 Aug 2024 10:31:45 +0200 Subject: [PATCH 7/7] Update CV version --- specification_documents/examples/intro_qc2.mzQC | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification_documents/examples/intro_qc2.mzQC b/specification_documents/examples/intro_qc2.mzQC index 1d71aaa..2761772 100644 --- a/specification_documents/examples/intro_qc2.mzQC +++ b/specification_documents/examples/intro_qc2.mzQC @@ -175,7 +175,7 @@ { "name": "Proteomics Standards Initiative Mass Spectrometry Ontology", "uri": "https://github.com/HUPO-PSI/psi-ms-CV/releases/download/v4.1.157/psi-ms.obo", - "version": "4.1.157" + "version": "4.1.172" } ] }