diff --git a/LOG.md b/LOG.md new file mode 100644 index 0000000..fbaf79c --- /dev/null +++ b/LOG.md @@ -0,0 +1,8 @@ +### Version Updates + +#### v2.1.0 + * Added support for updated QCs, to enable the new generic schema ``quality_metric_generic`` + + +#### v2.0.0 + * Initial release after major changes to support the new YAML format for portal objects diff --git a/docs/conf.py b/docs/conf.py index cc806d8..9565327 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ author = 'Michele Berselli, CGAP & SMaHT Team' # The full version, including alpha/beta/rc tags -release = '2.0.0' +release = '2.1.0' # -- General configuration --------------------------------------------------- diff --git a/docs/yaml_workflow.rst b/docs/yaml_workflow.rst index 4282522..7eb360f 100644 --- a/docs/yaml_workflow.rst +++ b/docs/yaml_workflow.rst @@ -63,6 +63,8 @@ Template argument_type: qc. # qc_type, e.g. quality_metric_vcfcheck # none can be used as # if a qc_type is not defined + # quality_metric_generic can be used as + # to use the general qc_type instead of a custom one argument_to_be_attached_to: # All the following fields are optional and provided as example, # can be expanded to anything accepted by the schema @@ -164,11 +166,18 @@ Definition of the type of the output. For a **file** output, the argument type is defined as ``file.``, where ```` is the format used by the file. ```` needs to match a file format that has been previously defined, see :ref:`File Format `. -For a **QC** (Quality Control) output, the argument type is defined as ``qc.``, where ```` is a a ``qc_type`` defined in the the schema, see `schemas `__. - For a **report** output, the argument type is defined as ``report.``, where ```` is the type of the report (e.g., file). -*Note*: We are currently re-thinking how QC and report outputs work, the current definitions are temporary solutions that may change soon. +For a **QC** (Quality Control) output, the argument type is defined as ``qc.``, where ```` is a ``qc_type`` defined in the schema, see `schemas `__. +While custom ``qc_type`` schemas are still supported for compatibility, we introduced a new generic type ``quality_metric_generic``. +We recommend to use this new type to implement QCs. + +When using ``quality_metric_generic`` as a ``qc_type``, it is possible to generate two different types of output: a key-value pairs JSON file and a compressed file. +The JSON file can be used to create a summary report of the quality metrics generated by the QC process. +The compressed file can be used to store the original output for the QC, including additional data or graphs. +Both the JSON file and compressed file will be attached to the file specified as target by ``argument_to_be_attached_to``. +The content of the JSON file will be patched directly on the target file, while the compressed file will be made available for download on the file via a link. +The output type can be specified by setting ``json: True`` or ``zipped: True`` in the the QC output definition. secondary_files ^^^^^^^^^^^^^^^ diff --git a/pipeline_utils/lib/yaml_parser.py b/pipeline_utils/lib/yaml_parser.py index 3282c00..f24f71c 100644 --- a/pipeline_utils/lib/yaml_parser.py +++ b/pipeline_utils/lib/yaml_parser.py @@ -178,8 +178,10 @@ class YAMLWorkflow(YAMLTemplate): INPUT_FILE_SCHEMA = 'Input file' OUTPUT_PROCESSED_FILE_SCHEMA = 'Output processed file' OUTPUT_QC_FILE_SCHEMA = 'Output QC file' + GENERIC_QC_FILE_SCHEMA = 'Generic QC file' OUTPUT_REPORT_FILE_SCHEMA = 'Output report file' QC_SCHEMA = 'qc' + QUALITY_METRIC_GENERIC_SCHEMA = 'quality_metric_generic' REPORT_SCHEMA = 'report' ARGUMENT_TO_BE_ATTACHED_TO_SCHEMA = 'argument_to_be_attached_to' ZIPPED_SCHEMA = 'zipped' @@ -253,7 +255,12 @@ def _arguments_output(self): self.SECONDARY_FILE_FORMATS_SCHEMA: values.get(self.SECONDARY_FILES_SCHEMA, []) } elif type == self.QC_SCHEMA: - argument_type = self.OUTPUT_QC_FILE_SCHEMA + # handle generic vs specific QC schema + if format == self.QUALITY_METRIC_GENERIC_SCHEMA: + argument_type = self.GENERIC_QC_FILE_SCHEMA + else: + argument_type = self.OUTPUT_QC_FILE_SCHEMA + # create base QC argument argument_ = { self.ARGUMENT_TYPE_SCHEMA: argument_type, self.WORKFLOW_ARGUMENT_NAME_SCHEMA: name, @@ -263,9 +270,15 @@ def _arguments_output(self): self.QC_JSON_SCHEMA: values.get(self.JSON_SCHEMA, False), self.QC_TABLE_SCHEMA: values.get(self.TABLE_SCHEMA, False) } - # handle edge case for missing QC type - if format not in ['none']: + # handle edge case for missing or generic QC type + if format not in ['none', self.QUALITY_METRIC_GENERIC_SCHEMA]: argument_[self.QC_TYPE_SCHEMA] = format + # create argument format for generic QCs (JSON or ZIP) + elif format == self.QUALITY_METRIC_GENERIC_SCHEMA: + if argument_[self.QC_JSON_SCHEMA]: + argument_[self.ARGUMENT_FORMAT_SCHEMA] = 'json' + else: + argument_[self.ARGUMENT_FORMAT_SCHEMA] = 'zip' # quality controls, TODO # these fields are bad, need to rework how QCs work if values.get(self.HTML_IN_ZIPPED_SCHEMA): @@ -371,7 +384,7 @@ def _arguments(self, input, project): self.ARGUMENT_TYPE_SCHEMA: type } if type == self.PARAMETER_SCHEMA: - argument_.setdefault(self.VALUE_TYPE_SCHEMA, format) + argument_[self.VALUE_TYPE_SCHEMA] = format for k, v in values.items(): if k != self.ARGUMENT_TYPE_SCHEMA: # handle files specifications, TODO diff --git a/poetry.lock b/poetry.lock index 0af5b7c..db09ac3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,23 +1,26 @@ -# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand. +# This file is automatically @generated by Poetry and should not be changed by hand. [[package]] name = "attrs" -version = "22.2.0" +version = "23.1.0" description = "Classes Without Boilerplate" category = "main" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" files = [ - {file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"}, - {file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"}, + {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, + {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, ] +[package.dependencies] +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} + [package.extras] -cov = ["attrs[tests]", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"] -dev = ["attrs[docs,tests]"] -docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope.interface"] -tests = ["attrs[tests-no-zope]", "zope.interface"] -tests-no-zope = ["cloudpickle", "cloudpickle", "hypothesis", "hypothesis", "mypy (>=0.971,<0.990)", "mypy (>=0.971,<0.990)", "pympler", "pympler", "pytest (>=4.3.0)", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-mypy-plugins", "pytest-xdist[psutil]", "pytest-xdist[psutil]"] +cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] +dev = ["attrs[docs,tests]", "pre-commit"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] +tests = ["attrs[tests-no-zope]", "zope-interface"] +tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] [[package]] name = "aws-requests-auth" @@ -36,18 +39,18 @@ requests = ">=0.14.0" [[package]] name = "awscli" -version = "1.27.101" +version = "1.27.131" description = "Universal Command Line Environment for AWS." category = "main" optional = false python-versions = ">= 3.7" files = [ - {file = "awscli-1.27.101-py3-none-any.whl", hash = "sha256:6780a917adb60d9ee4d2ca9f25b833baacad6344d53c50274360396ed1782980"}, - {file = "awscli-1.27.101.tar.gz", hash = "sha256:c4ec0ba025f2310be62686630f54c6c28ffce1dc54e730f42329355ea5a9117a"}, + {file = "awscli-1.27.131-py3-none-any.whl", hash = "sha256:97375432647cc7049c08d9418ff5b513329d4feaff8a855243f305f0eac5831c"}, + {file = "awscli-1.27.131.tar.gz", hash = "sha256:e310b1c8b983d2e8aad51ba9dbca05c731b92209d5a3f88634ae4bc931f288ee"}, ] [package.dependencies] -botocore = "1.29.101" +botocore = "1.29.131" colorama = ">=0.2.5,<0.4.5" docutils = ">=0.10,<0.17" PyYAML = ">=3.10,<5.5" @@ -56,14 +59,14 @@ s3transfer = ">=0.6.0,<0.7.0" [[package]] name = "beautifulsoup4" -version = "4.12.0" +version = "4.12.2" description = "Screen-scraping library" category = "main" optional = false python-versions = ">=3.6.0" files = [ - {file = "beautifulsoup4-4.12.0-py3-none-any.whl", hash = "sha256:2130a5ad7f513200fae61a17abb5e338ca980fa28c439c0571014bc0217e9591"}, - {file = "beautifulsoup4-4.12.0.tar.gz", hash = "sha256:c5fceeaec29d09c84970e47c65f2f0efe57872f7cff494c9691a26ec0ff13234"}, + {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, + {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, ] [package.dependencies] @@ -75,30 +78,30 @@ lxml = ["lxml"] [[package]] name = "benchmark-4dn" -version = "0.5.21" +version = "0.5.23" description = "Benchmark functions that returns total space, mem, cpu given input size and parameters for the CWL workflows" category = "main" optional = false python-versions = ">=3.5,<4.0" files = [ - {file = "Benchmark-4dn-0.5.21.tar.gz", hash = "sha256:7b0dbe88be53ca2a629d52a64907781431fda68d23be8e08cd1d8e013eea6a8c"}, - {file = "benchmark_4dn-0.5.21-py3-none-any.whl", hash = "sha256:7420f4f712b771028af310c63723c0b21e0678edac195449831b5702d64dc191"}, + {file = "Benchmark-4dn-0.5.23.tar.gz", hash = "sha256:f17a02c5cd9669f96ea65cb31603106bb77a0a01775b4fd2983a95ab66ee7b7c"}, + {file = "benchmark_4dn-0.5.23-py3-none-any.whl", hash = "sha256:bd8aa8c252e868f1d7afa2c2003e1148f409e6dc825af5656f37cd5fec30e2c3"}, ] [[package]] name = "boto3" -version = "1.26.101" +version = "1.26.131" description = "The AWS SDK for Python" category = "main" optional = false python-versions = ">= 3.7" files = [ - {file = "boto3-1.26.101-py3-none-any.whl", hash = "sha256:5f5279a63b359ba8889e9a81b319e745b14216608ffb5a39fcbf269d1af1ea83"}, - {file = "boto3-1.26.101.tar.gz", hash = "sha256:670ae4d1875a2162e11c6e941888817c3e9cf1bb9a3335b3588d805b7d24da31"}, + {file = "boto3-1.26.131-py3-none-any.whl", hash = "sha256:5b2b13d9f3430e3d5e768bf32097d5d6d16f47a4719f2656de67da49dd3e4de1"}, + {file = "boto3-1.26.131.tar.gz", hash = "sha256:061d3270472b9be09901bb08a45e9871ac8f86a9b1c9c615535ca0223acd7582"}, ] [package.dependencies] -botocore = ">=1.29.101,<1.30.0" +botocore = ">=1.29.131,<1.30.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.6.0,<0.7.0" @@ -107,14 +110,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.29.101" +version = "1.29.131" description = "Low-level, data-driven core of boto 3." category = "main" optional = false python-versions = ">= 3.7" files = [ - {file = "botocore-1.29.101-py3-none-any.whl", hash = "sha256:60c7a7bf8e2a288735e507007a6769be03dc24815f7dc5c7b59b12743f4a31cf"}, - {file = "botocore-1.29.101.tar.gz", hash = "sha256:7bb60d9d4c49500df55dfb6005c16002703333ff5f69dada565167c8d493dfd5"}, + {file = "botocore-1.29.131-py3-none-any.whl", hash = "sha256:d0dea23bccdfd7c2f6d0cd3216cfbd7065bc3e9e7b1ef6fee0952b04f5d2cffd"}, + {file = "botocore-1.29.131.tar.gz", hash = "sha256:ffbd85915b2624c545438a33c2624a809593720a10648f6e757fe50be4893188"}, ] [package.dependencies] @@ -127,14 +130,14 @@ crt = ["awscrt (==0.16.9)"] [[package]] name = "certifi" -version = "2022.12.7" +version = "2023.5.7" description = "Python package for providing Mozilla's CA Bundle." category = "main" optional = false python-versions = ">=3.6" files = [ - {file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"}, - {file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"}, + {file = "certifi-2023.5.7-py3-none-any.whl", hash = "sha256:c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716"}, + {file = "certifi-2023.5.7.tar.gz", hash = "sha256:0f0d56dc5a6ad56fd4ba36484d6cc34451e1c6548c61daad8c320169f91eddc7"}, ] [[package]] @@ -310,14 +313,14 @@ files = [ [[package]] name = "importlib-metadata" -version = "6.1.0" +version = "6.6.0" description = "Read metadata from Python packages" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "importlib_metadata-6.1.0-py3-none-any.whl", hash = "sha256:ff80f3b5394912eb1b108fcfd444dc78b7f1f3e16b16188054bd01cb9cb86f09"}, - {file = "importlib_metadata-6.1.0.tar.gz", hash = "sha256:43ce9281e097583d758c2c708c4376371261a02c34682491a8e98352365aad20"}, + {file = "importlib_metadata-6.6.0-py3-none-any.whl", hash = "sha256:43dd286a2cd8995d5eaef7fee2066340423b818ed3fd70adf0bad5f1fac53fed"}, + {file = "importlib_metadata-6.6.0.tar.gz", hash = "sha256:92501cdf9cc66ebd3e612f1b4f0c0765dfa42f0fa38ffb319b6bd84dd675d705"}, ] [package.dependencies] @@ -398,14 +401,14 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- [[package]] name = "magma-suite" -version = "1.2.0" +version = "1.2.1" description = "Collection of tools to manage meta-workflows automation." category = "main" optional = false python-versions = ">=3.7,<3.9" files = [ - {file = "magma_suite-1.2.0-py3-none-any.whl", hash = "sha256:5ed7155ff6e40eb69109146750da3a6a5d843e40da66c661e8c336bd017a0877"}, - {file = "magma_suite-1.2.0.tar.gz", hash = "sha256:fb3cbe34bda0cbba3637952ff7bd2b426473e66d04190dd68a320e9d74a27cce"}, + {file = "magma_suite-1.2.1-py3-none-any.whl", hash = "sha256:d888f781f0d340edac6b14ab6d23fad6283610268dd08a65f9e84528b95a42da"}, + {file = "magma_suite-1.2.1.tar.gz", hash = "sha256:b5f42a17f367dfe991e50b7862be8f0b857ee512c44649dd9193af94655a4dda"}, ] [package.dependencies] @@ -439,14 +442,14 @@ kerberos = ["requests-kerberos"] [[package]] name = "packaging" -version = "23.0" +version = "23.1" description = "Core utilities for Python packages" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"}, - {file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"}, + {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, + {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, ] [[package]] @@ -482,14 +485,14 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "pyasn1" -version = "0.4.8" -description = "ASN.1 types and codecs" +version = "0.5.0" +description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" category = "main" optional = false -python-versions = "*" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ - {file = "pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d"}, - {file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"}, + {file = "pyasn1-0.5.0-py2.py3-none-any.whl", hash = "sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57"}, + {file = "pyasn1-0.5.0.tar.gz", hash = "sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde"}, ] [[package]] @@ -531,18 +534,17 @@ files = [ [[package]] name = "pytest" -version = "7.2.2" +version = "7.3.1" description = "pytest: simple powerful testing with Python" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "pytest-7.2.2-py3-none-any.whl", hash = "sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e"}, - {file = "pytest-7.2.2.tar.gz", hash = "sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4"}, + {file = "pytest-7.3.1-py3-none-any.whl", hash = "sha256:3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362"}, + {file = "pytest-7.3.1.tar.gz", hash = "sha256:434afafd78b1d78ed0addf160ad2b77a30d35d4bdf8af234fe621919d9ed15e3"}, ] [package.dependencies] -attrs = ">=19.2.0" colorama = {version = "*", markers = "sys_platform == \"win32\""} exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} @@ -552,7 +554,7 @@ pluggy = ">=0.12,<2.0" tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] -testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] [[package]] name = "python-dateutil" @@ -713,14 +715,14 @@ pyasn1 = ">=0.1.3" [[package]] name = "s3transfer" -version = "0.6.0" +version = "0.6.1" description = "An Amazon S3 Transfer Manager" category = "main" optional = false python-versions = ">= 3.7" files = [ - {file = "s3transfer-0.6.0-py3-none-any.whl", hash = "sha256:06176b74f3a15f61f1b4f25a1fc29a4429040b7647133a463da8fa5bd28d5ecd"}, - {file = "s3transfer-0.6.0.tar.gz", hash = "sha256:2ed07d3866f523cc561bf4a00fc5535827981b117dd7876f036b0c1aca42c947"}, + {file = "s3transfer-0.6.1-py3-none-any.whl", hash = "sha256:3c0da2d074bf35d6870ef157158641178a4204a6e689e82546083e31e0311346"}, + {file = "s3transfer-0.6.1.tar.gz", hash = "sha256:640bb492711f4c0c0905e1f62b6aaeb771881935ad27884852411f8e9cacbca9"}, ] [package.dependencies] @@ -755,14 +757,14 @@ files = [ [[package]] name = "soupsieve" -version = "2.4" +version = "2.4.1" description = "A modern CSS selector implementation for Beautiful Soup." category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "soupsieve-2.4-py3-none-any.whl", hash = "sha256:49e5368c2cda80ee7e84da9dbe3e110b70a4575f196efb74e51b94549d921955"}, - {file = "soupsieve-2.4.tar.gz", hash = "sha256:e28dba9ca6c7c00173e34e4ba57448f0688bb681b7c5e8bf4971daafc093d69a"}, + {file = "soupsieve-2.4.1-py3-none-any.whl", hash = "sha256:1c1bfee6819544a3447586c889157365a27e10d88cde3ad3da0cf0ddf646feb8"}, + {file = "soupsieve-2.4.1.tar.gz", hash = "sha256:89d12b2d5dfcd2c9e8c22326da9d9aa9cb3dfab0a83a024f05704076ee8d35ea"}, ] [[package]] @@ -852,14 +854,14 @@ files = [ [[package]] name = "tomlkit" -version = "0.11.7" +version = "0.11.8" description = "Style preserving TOML library" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "tomlkit-0.11.7-py3-none-any.whl", hash = "sha256:5325463a7da2ef0c6bbfefb62a3dc883aebe679984709aee32a317907d0a8d3c"}, - {file = "tomlkit-0.11.7.tar.gz", hash = "sha256:f392ef70ad87a672f02519f99967d28a4d3047133e2d1df936511465fbb3791d"}, + {file = "tomlkit-0.11.8-py3-none-any.whl", hash = "sha256:8c726c4c202bdb148667835f68d68780b9a003a9ec34167b6c673b38eff2a171"}, + {file = "tomlkit-0.11.8.tar.gz", hash = "sha256:9330fc7faa1db67b541b28e62018c17d20be733177d290a13b24c62d1614e0c3"}, ] [[package]]