diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 06fa6805..1fcaa542 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -116,4 +116,3 @@ To get started: Devcontainer specs: - [DevContainer config](.devcontainer/devcontainer.json) -- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 7fe6cd6d..34300c5d 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,7 +42,7 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 22.10.1)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 06728e6d..b4ab0269 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,18 +14,23 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/funcscan/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/funcscan/results-${{ github.sha }}" } - profiles: test_full,aws_tower + profiles: test_full + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index b50ef8e8..cb05b166 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,18 +12,22 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/funcscan/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/funcscan/results-test-${{ github.sha }}" } - profiles: test,aws_tower + profiles: test + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ff568ad9..f45e42aa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: strategy: matrix: NXF_VER: - - "22.10.1" + - "23.04.0" - "latest-everything" parameters: - "--annotation_tool prodigal" @@ -52,7 +52,7 @@ jobs: strategy: matrix: NXF_VER: - - "22.10.1" + - "23.04.0" - "latest-everything" parameters: - "--annotation_tool prodigal" @@ -72,31 +72,31 @@ jobs: run: | nextflow run ${GITHUB_WORKSPACE} -profile test_bgc,docker --outdir ./results ${{ matrix.parameters }} - test_deeparg: - name: Run pipeline with test data (DeepARG only workflow) - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/funcscan') }}" - runs-on: ubuntu-latest - strategy: - matrix: - NXF_VER: - - "22.10.1" - - "latest-everything" - parameters: - - "--annotation_tool bakta --annotation_bakta_db_downloadtype light" - - "--annotation_tool prodigal" - - "--annotation_tool prokka" - - "--annotation_tool pyrodigal" + # test_deeparg: #Tests switched off as long as DeepARG servers are unavaiable for database download. + # name: Run pipeline with test data (DeepARG only workflow) + # # Only run on push if this is the nf-core dev branch (merged PRs) + # if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/funcscan') }}" + # runs-on: ubuntu-latest + # strategy: + # matrix: + # NXF_VER: + # - "23.04.0" + # - "latest-everything" + # parameters: + # - "--annotation_tool bakta --annotation_bakta_db_downloadtype light" + # - "--annotation_tool prodigal" + # - "--annotation_tool prokka" + # - "--annotation_tool pyrodigal" - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 + # steps: + # - name: Check out pipeline code + # uses: actions/checkout@v2 - - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 - with: - version: "${{ matrix.NXF_VER }}" + # - name: Install Nextflow + # uses: nf-core/setup-nextflow@v1 + # with: + # version: "${{ matrix.NXF_VER }}" - - name: Run pipeline with test data (DeepARG workflow) - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_deeparg,docker --outdir ./results ${{ matrix.parameters }} + # - name: Run pipeline with test data (DeepARG workflow) + # run: | + # nextflow run ${GITHUB_WORKSPACE} -profile test_deeparg,docker --outdir ./results ${{ matrix.parameters }} diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ecc..25488dcc 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,4 +1,9 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update vscode: extensions: # based on nf-core.nf-core-extensionpack diff --git a/CHANGELOG.md b/CHANGELOG.md index da127edf..7fa76fc2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,31 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v1.1.3 - [2023-08-11] + +### `Added` + +- [#290](https://github.com/nf-core/funcscan/pull/290) Merged pipeline template of nf-core/tools version 2.9, updated references. (by @jfy133) +- [#285](https://github.com/nf-core/funcscan/pull/285) Use nf-validation for samplesheet checking and added support for `fna.gz` input FASTA files. (by @louperelo, @mirpedrol, @jfy133) +- [#295](https://github.com/nf-core/funcscan/pull/295) Add Prokka to MultiQC output. (by @louperelo) + +### `Fixed` + +- [#296](https://github.com/nf-core/funcscan/pull/296) Fixed empty output when saving prodigal annotations. (reported by @louperelo, fix by @jasmezz) +- [#297](https://github.com/nf-core/funcscan/pull/297) Added check for empty annotation files prior going into screening. (❤️ to @alexhbnr for requesting, added by @jfy133) +- [#299](https://github.com/nf-core/funcscan/pull/299) Fixed pigz error with symlinks in Pyrodigal. (by @jasmezz) +- [#300](https://github.com/nf-core/funcscan/pull/300) Fixed wrong Pyrodigal channels being submitted to antiSMASH. (reported by Till Bayer, fix by @jasmezz) +- [#302](https://github.com/nf-core/funcscan/pull/302) Removed trouble-causing default parameters in json schema. (by @robsyme) + +### `Dependencies` + +| Tool | Previous version | New version | +| ------ | ---------------- | ----------- | +| comBGC | 0.6.0 | 0.6.1 | +| GECCO | 0.9.2 | 0.9.8 | + +### `Deprecated` + ## v1.1.2 - [2023-06-30] ### `Added` diff --git a/CITATIONS.md b/CITATIONS.md index 73e74b41..84ada6d1 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -20,7 +20,7 @@ - [AMPlify](https://doi.org/10.1186/s12864-022-08310-4) - > CLi, C., Sutherland, D., Hammond, S. A., Yang, C., Taho, F., Bergman, L., Houston, S., Warren, R. L., Wong, T., Hoang, L., Cameron, C. E., Helbing, C. C., & Birol, I. (2022). AMPlify: attentive deep learning model for discovery of novel antimicrobial peptides effective against WHO priority pathogens. BMC genomics, 23(1), 77. [DOI: 10.1186/s12864-022-08310-4](https://doi.org/10.1186/s12864-022-08310-4) + > Li, C., Sutherland, D., Hammond, S. A., Yang, C., Taho, F., Bergman, L., Houston, S., Warren, R. L., Wong, T., Hoang, L., Cameron, C. E., Helbing, C. C., & Birol, I. (2022). AMPlify: attentive deep learning model for discovery of novel antimicrobial peptides effective against WHO priority pathogens. BMC genomics, 23(1), 77. [DOI: 10.1186/s12864-022-08310-4](https://doi.org/10.1186/s12864-022-08310-4) - [AMRFinderPlus](https://doi.org/10.1038/s41598-021-91456-0) @@ -34,6 +34,14 @@ > Schwengers, O., Jelonek, L., Dieckmann, M. A., Beyvers, S., Blom, J., & Goesmann, A. (2021). Bakta: rapid and standardized annotation of bacterial genomes via alignment-free sequence identification. Microbial Genomics, 7(11). [DOI: 10.1099/mgen.0.000685](https://doi.org/10.1099/mgen.0.000685) +- [bioawk](https://github.com/lh3/bioawk) + + > Li, H. (2023). bioawk: BWK awk modified for biological data. Github. Retrieved July 12, 2023, from https://github.com/lh3/bioawk + +- [comBGC](https://github.com/nf-core/funcscan) + + > Frangenberg, J., Fellows Yates, J. A., Ibrahim, A., Perelo, L., & Beber, M. E. (2023). nf-core/funcscan: 1.0.0 - German Rollmops - 2023-02-15. https://doi.org/10.5281/zenodo.7643100 + - [DeepARG](https://doi.org/10.1186/s40168-018-0401-z) > Arango-Argoty, G., Garner, E., Pruden, A., Heath, L. S., Vikesland, P., & Zhang, L. (2018). DeepARG: a deep learning approach for predicting antibiotic resistance genes from metagenomic data. Microbiome, 6(1), 23. [DOI: 10.1186/s40168-018-0401-z](https://doi.org/10.1186/s40168-018-0401-z) @@ -48,16 +56,16 @@ - [GECCO](https://gecco.embl.de) - > Carroll, L. M. , Larralde, M., Fleck, J. S., Ponnudurai, R., Milanese, A., Cappio Barazzone, E. & Zeller, G. (2021). Accurate de novo identification of biosynthetic gene clusters with GECCO. bioRxiv [DOI: 10.1101/2021.05.03.442509](https://doi.org/10.1101/2021.05.03.442509) - -- [hAMRonization](https://github.com/pha4ge/hAMRonization) - - > Public Health Alliance for Genomic Epidemiology (pha4ge). (2022). Parse multiple Antimicrobial Resistance Analysis Reports into a common data structure. Github. Retrieved October 5, 2022, from [https://github.com/pha4ge/hAMRonization](https://github.com/pha4ge/hAMRonization) + > Carroll, L. M. , Larralde, M., Fleck, J. S., Ponnudurai, R., Milanese, A., Cappio Barazzone, E. & Zeller, G. (2021). Accurate de novo identification of biosynthetic gene clusters with GECCO. bioRxiv. [DOI: 10.1101/2021.05.03.442509](https://doi.org/10.1101/2021.05.03.442509) - [AMPcombi](https://github.com/Darcy220606/AMPcombi) > Ibrahim, A. & Perelo, L. (2023). Darcy220606/AMPcombi. [DOI: 10.5281/zenodo.7639121](https://doi.org/10.5281/zenodo.7639121). +- [hAMRonization](https://github.com/pha4ge/hAMRonization) + + > Maguire, F., Fornika, D., Mendes, I., Phelan, J., Underwood, A., Witney, A., pvanheus, Manuele, A., Lee, T., amos, & imendes. (2023). pha4ge/hAMRonization: Zenodo Release. Zenodo. https://doi.org/10.5281/ZENODO.8131134 + - [HMMER](https://doi.org/10.1371/journal.pcbi.1002195.) > Eddy S. R. (2011). Accelerated Profile HMM Searches. PLoS computational biology, 7(10), e1002195. [DOI: 10.1371/journal.pcbi.1002195](https://doi.org/10.1371/journal.pcbi.1002195) @@ -98,5 +106,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) - > Kurtzer, G. M., Sochat, V., & Bauer, M. W. (2017). Singularity: Scientific containers for mobility of compute. PloS one, 12(5), e0177459. [DOI: 10.1371/journal.pone.0177459](https://doi.org/10.1371/journal.pone.0177459) + + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/README.md b/README.md index 75eb5ec5..76eecfb7 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,8 @@ # ![nf-core/funscan](docs/images/nf-core-funcscan_logo_flat_light.png#gh-light-mode-only) ![nf-core/funscan](docs/images/nf-core-funcscan_logo_flat_dark.png#gh-dark-mode-only) [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/funcscan/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7643099-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7643099) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) + [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -67,11 +68,11 @@ nextflow run nf-core/funcscan \ > provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; > see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -For more details, please refer to the [usage documentation](https://nf-co.re/funcscan/usage) and the [parameter documentation](https://nf-co.re/funcscan/parameters). +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/funcscan/usage) and the [parameter documentation](https://nf-co.re/funcscan/parameters). ## Pipeline output -To see the the results of a test run with a full size dataset refer to the [results](https://nf-co.re/funcscan/results) tab on the nf-core website pipeline page. +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/funcscan/results) tab on the nf-core website pipeline page. For more details about the output files and reports, please refer to the [output documentation](https://nf-co.re/funcscan/output). diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 450fdf91..2cc2aa50 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,17 +3,21 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/funcscan Methods Description" section_href: "https://github.com/nf-core/funcscan" plot_type: "html" -## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline ## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

-

Data was processed using nf-core/funcscan v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

Data was processed using nf-core/funcscan v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}
+

${tool_citations}

References

Notes:
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 356df32d..25990a0d 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/funcscan + This report has been generated by the nf-core/funcscan analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-funcscan-methods-description": order: -1000 @@ -15,3 +15,6 @@ export_plots: true custom_logo: "nf-core-funcscan_logo_flat_light.png" custom_logo_url: https://nf-co.re/funcscan custom_logo_title: "nf-core/funcscan" + +## Tool specific configuration +prokka_fn_snames: True diff --git a/assets/nf-core-funcscan_logo_light.png b/assets/nf-core-funcscan_logo_light.png index 2971be16..f00d53e8 100644 Binary files a/assets/nf-core-funcscan_logo_light.png and b/assets/nf-core-funcscan_logo_light.png differ diff --git a/assets/schema_input.json b/assets/schema_input.json index ebaffc21..4c70c654 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,27 +10,19 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"], + "unique": true }, - "fastq_1": { + "fasta": { "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" - }, - "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast|n)?a\\.gz$", + "errorMessage": "Fasta file for reads must be provided, cannot contain spaces and must have extension '.fa.gz', '.fna.gz' or '.fasta.gz'", + "unique": true } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "fasta"] } } diff --git a/assets/slackreport.json b/assets/slackreport.json index 043d02f2..501716d6 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_name": "nf-core/funcscan v${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index 16c8279a..00000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,215 +0,0 @@ -#!/usr/bin/env python - - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = ( - ".fa", - ".fna", - ".fasta", - ".fa.gz", - ".fna.gz", - ".fasta.gz", - ) - - def __init__( - self, - sample_col="sample", - contig_col="fasta", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - sample_col (str): The name of the column that contains a contig's - identifier (default "sample"). - contig_col (str): The name of the column that contains the contig's - FASTA file path (default "fastqa"). - - """ - super().__init__(**kwargs) - self._sample_col = sample_col - self._contig_col = contig_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row and insert the read pairing status. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_sample(row) - self._validate_fasta(row) - self._validate_fasta_format(row) - self._seen.add((row[self._sample_col], row[self._contig_col])) - self.modified.append(row) - - def _validate_sample(self, row): - """Assert that the sample name exists and convert spaces to underscores.""" - if len(row[self._sample_col]) <= 0: - raise AssertionError("Sample input is required.") - # Sanitize samples slightly. - row[self._sample_col] = row[self._sample_col].replace(" ", "_") - - def _validate_fasta(self, row): - """Assert that the FASTA entry is non-empty and has the right format.""" - assert len(row[self._contig_col]) > 0, "The FASTA file is required." - assert ( - " " not in Path(row[self._contig_col]).name - ), f"The FASTA filename may not contain any spaces '{row[self._contig_col]}'." - - def _validate_fasta_format(self, row): - """Assert that a given filename has one of the expected FASTQ extensions.""" - filename = Path(row[self._contig_col]).name - assert any(filename.endswith(extension) for extension in self.VALID_FORMATS), ( - f"The FASTA file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file` object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by nf-core pipelines. - - Validate the general shape of the table, expected columns, and each row. Also add - an additional column which records whether one or two FASTQ reads were found. - - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure:: - - sample,fasta - contig_1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/bacteroides_fragilis/genome/genome.fna.gz - - """ - required_columns = {"sample", "fasta"} - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - req_cols = ", ".join(required_columns) - logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - header = list(reader.fieldnames) - header.insert(1, "single_end") - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - return parser.parse_args(argv) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/bin/comBGC.py b/bin/comBGC.py index 53cd64a6..81f1eb3b 100755 --- a/bin/comBGC.py +++ b/bin/comBGC.py @@ -32,7 +32,7 @@ SOFTWARE. """ -tool_version = "0.6.0" +tool_version = "0.6.1" welcome = """\ ........................ * comBGC v.{version} * @@ -463,7 +463,7 @@ def gecco_workflow(gecco_paths): # GECCO output columns that can be mapped (comBGC:GECCO) map_dict = { "sequence_id": "Contig_ID", - "bgc_id": "bgc_id", + "bgc_id": "cluster_id", "type": "Product_class", "average_p": "BGC_probability", "start": "BGC_start", @@ -524,7 +524,7 @@ def gecco_workflow(gecco_paths): # Add column 'InterPro_ID' for gbk_path in gbk_paths: bgc_id = gbk_path.split("/")[-1][0:-4] - gecco_df.loc[gecco_df["bgc_id"] == bgc_id, "InterPro_ID"] = getInterProID(gbk_path) + gecco_df.loc[gecco_df["cluster_id"] == bgc_id, "InterPro_ID"] = getInterProID(gbk_path) # Add empty columns with no output from GECCO gecco_df["BGC_complete"] = "NA" diff --git a/conf/modules.config b/conf/modules.config index 02f2ea53..f5472ced 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,14 +18,6 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: SAMPLESHEET_CHECK { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, @@ -57,7 +49,7 @@ process { enabled: false, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - ext.prefix = { "${meta.it}.fa" } + ext.prefix = { "${meta.id}.fa" } } withName: PROKKA { @@ -130,7 +122,7 @@ process { path: { "${params.outdir}/annotation/prodigal/${meta.id}" }, mode: params.publish_dir_mode, enabled: params.save_annotations, - pattern: "*.{faa,fna,gff}", + pattern: "*.{faa,fna,gff}.gz", saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.args = [ @@ -146,7 +138,7 @@ process { path: { "${params.outdir}/annotation/prodigal/${meta.id}" }, mode: params.publish_dir_mode, enabled: params.save_annotations, - pattern: "*.gbk", + pattern: "*.gbk.gz", saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.args = [ diff --git a/conf/test_full.config b/conf/test_full.config index 9d64e003..e8622529 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -10,8 +10,6 @@ ---------------------------------------------------------------------------------------- */ -cleanup = true - params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' diff --git a/conf/test_nothing.config b/conf/test_nothing.config new file mode 100644 index 00000000..f5df5b3b --- /dev/null +++ b/conf/test_nothing.config @@ -0,0 +1,33 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Although in this case we turn everything off + + Use as follows: + nextflow run nf-core/funcscan -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test nothing profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/samplesheet.csv' + amp_hmmsearch_models = 'https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/hmms/mybacteriocin.hmm' + + annotation_tool = 'prodigal' + + run_arg_screening = false + run_amp_screening = false + run_bgc_screening = false +} diff --git a/docs/usage.md b/docs/usage.md index 39822a51..cf9ba24f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -249,7 +249,8 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. > ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). -> The above pipeline run specified with a params file in yaml format: + +The above pipeline run specified with a params file in yaml format: ```bash nextflow run nf-core/funcscan -profile docker -params-file params.yaml @@ -261,7 +262,6 @@ with `params.yaml` containing: input: './samplesheet.csv' outdir: './results/' genome: 'GRCh37' -input: 'data' <...> ``` diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index 9b34804d..00000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,530 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import nextflow.Nextflow -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-apptainer', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - Nextflow.error('Exiting!') - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 25a0a74a..408951ae 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -128,7 +128,7 @@ class NfcoreTemplate { def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) diff --git a/lib/WorkflowFuncscan.groovy b/lib/WorkflowFuncscan.groovy index c70f4940..ba134ea7 100755 --- a/lib/WorkflowFuncscan.groovy +++ b/lib/WorkflowFuncscan.groovy @@ -11,6 +11,7 @@ class WorkflowFuncscan { // Check and validate parameters // public static void initialise(params, log) { + genomeExistsError(params, log) //if (!params.fasta) { @@ -45,15 +46,134 @@ class WorkflowFuncscan { return yaml_file_text } - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // + // Generate methods description for MultiQC + // + + public static String toolCitationText(params) { + + def preprocessing_text = "The pipeline used the following tools: preprocessing included bioawk (Li 2023)." + + def annotation_text = [ + "Annotation was carried out with:", + params.annotation_tool == 'prodigal' ? "Prodigal (Hyatt et al. 2010)." : "", + params.annotation_tool == 'pyrodigal' ? "Pyrodigal (Larralde 2022)." : "", + params.annotation_tool == 'bakta' ? "BAKTA (Schwengers et al. 2021)." : "", + params.annotation_tool == 'prokka' ? "PROKKA (Seemann 2014)." : "", + ].join(' ').trim() + + def amp_text = [ + "The following antimicrobial peptide screening tools were used:", + !params.amp_skip_amplify ? "AMPlify (Li et al. 2022)," : "", + !params.amp_skip_macrel ? "Macrel (Santos-Júnior et al. 2020)," : "", + !params.amp_skip_ampir ? "ampir (Fingerhut et al. 2021)," : "", + !params.amp_skip_hmmsearch ? "HMMER (Eddy 2011)," : "", + ". The output from the antimicrobial peptide screening tools were standardised and summarised with AMPcombi (Ibrahim and Perelo 2023)." + ].join(' ').trim().replaceAll(", \\.", ".") + + def arg_text = [ + "The following antimicrobial resistance gene screening tools were used:", + !params.arg_skip_fargene ? "fARGene (Berglund et al. 2019)," : "", + !params.arg_skip_rgi ? "RGI (Alcock et al. 2020)," : "", + !params.arg_skip_amrfinderplus ? "AMRfinderplus (Feldgarden et al. 2021)," : "", + !params.arg_skip_deeparg ? "deepARG (Arango-Argoty 2018)," : "", + !params.arg_skip_abricate ? "ABRicate (Seemann 2020)," : "", + ". The output from the antimicrobial resistance gene screening tools were standardised and summarised with hAMRonization (Maguire et al. 2023)." + ].join(' ').trim().replaceAll(", +\\.", ".") + + def bgc_text = [ + "The following biosynthetic gene cluster screening tools were used:", + !params.bgc_skip_antismash ? "antiSMASH (Blin et al. 2021)," : "", + !params.bgc_skip_deepbgc ? "deepBGC (Hannigan et al. 2019)," : "", + !params.bgc_skip_gecco ? "GECCO (Carroll et al. 2021)," : "", + !params.bgc_skip_hmmsearch ? "HMMER (Eddy 2011)," : "", + ". The output from the biosynthetic gene cluster screening tools were standardised and summarised with comBGC (Frangenberg et al. 2023)." + ].join(' ').replaceAll(", +\\.", ".").trim() + + def postprocessing_text = "Run statistics were reported using MultiQC (Ewels et al. 2016)." + + def citation_text = [ + preprocessing_text, + annotation_text, + params.run_amp_screening ? amp_text : "", + params.run_arg_screening ? arg_text : "", + params.run_bgc_screening ? bgc_text : "", + postprocessing_text, + ].join(' ').trim() + + return citation_text + } + + public static String toolBibliographyText(params) { + + def preprocessing_text = "
  • Li, H. (2023). bioawk: BWK awk modified for biological data. Github. Retrieved July 12, 2023, from https://github.com/lh3/bioawk
  • " + + def annotation_text = [ + params.annotation_tool == 'prodigal' ? "
  • Hyatt, D., Chen, G. L., Locascio, P. F., Land, M. L., Larimer, F. W., & Hauser, L. J. (2010). Prodigal: prokaryotic gene recognition and translation initiation site identification. BMC bioinformatics, 11, 119. DOI: 10.1186/1471-2105-11-119" : "", + params.annotation_tool == 'pyrodigal' ? "
  • Larralde, M. (2022). Pyrodigal: Python bindings and interface to Prodigal, an efficient method for gene prediction in prokaryotes. Journal of Open Source Software, 7(72), 4296. DOI: 10.21105/joss.04296
  • " : "", + params.annotation_tool == 'bakta' ? "
  • Schwengers, O., Jelonek, L., Dieckmann, M. A., Beyvers, S., Blom, J., & Goesmann, A. (2021). Bakta: rapid and standardized annotation of bacterial genomes via alignment-free sequence identification. Microbial Genomics, 7(11). DOI: 10.1099/mgen.0.000685
  • " : "", + params.annotation_tool == 'prokka' ? "
  • Seemann, T. (2014). Prokka: rapid prokaryotic genome annotation. Bioinformatics (Oxford, England), 30(14), 2068–2069. DOI: 10.1093/bioinformatics/btu153
  • " : "", + ].join(' ').trim() + + def amp_text = [ + !params.amp_skip_amplify ? "
  • Li, C., Sutherland, D., Hammond, S. A., Yang, C., Taho, F., Bergman, L., Houston, S., Warren, R. L., Wong, T., Hoang, L., Cameron, C. E., Helbing, C. C., & Birol, I. (2022). AMPlify: attentive deep learning model for discovery of novel antimicrobial peptides effective against WHO priority pathogens. BMC genomics, 23(1), 77. DOI: 10.1186/s12864-022-08310-4
  • " : "", + !params.amp_skip_macrel ? "
  • Santos-Júnior, C. D., Pan, S., Zhao, X. M., & Coelho, L. P. (2020). Macrel: antimicrobial peptide screening in genomes and metagenomes. PeerJ, 8, e10555. DOI: 10.7717/peerj.10555
  • " : "", + !params.amp_skip_ampir ? "
  • Fingerhut, L., Miller, D. J., Strugnell, J. M., Daly, N. L., & Cooke, I. R. (2021). ampir: an R package for fast genome-wide prediction of antimicrobial peptides. Bioinformatics (Oxford, England), 36(21), 5262–5263. DOI: 10.1093/bioinformatics/btaa653
  • " : "", + "
  • Ibrahim, A. & Perelo, L. (2023). Darcy220606/AMPcombi. DOI: 10.5281/zenodo.7639121
  • " + ].join(' ').trim().replaceAll(", \\.", ".") + + def arg_text = [ + !params.arg_skip_fargene ? "
  • Berglund, F., Österlund, T., Boulund, F., Marathe, N. P., Larsson, D., & Kristiansson, E. (2019). Identification and reconstruction of novel antibiotic resistance genes from metagenomes. Microbiome, 7(1), 52. DOI: 10.1186/s40168-019-0670-1
  • " : "", + !params.arg_skip_rgi ? "
  • Alcock, B. P., Raphenya, A. R., Lau, T., Tsang, K. K., Bouchard, M., Edalatmand, A., Huynh, W., Nguyen, A. V., Cheng, A. A., Liu, S., Min, S. Y., Miroshnichenko, A., Tran, H. K., Werfalli, R. E., Nasir, J. A., Oloni, M., Speicher, D. J., Florescu, A., Singh, B., Faltyn, M., … McArthur, A. G. (2020). CARD 2020: antibiotic resistome surveillance with the comprehensive antibiotic resistance database. Nucleic acids research, 48(D1), D517–D525. DOI: 10.1093/nar/gkz935
  • " : "", + !params.arg_skip_amrfinderplus ? "
  • Feldgarden, M., Brover, V., Gonzalez-Escalona, N., Frye, J. G., Haendiges, J., Haft, D. H., Hoffmann, M., Pettengill, J. B., Prasad, A. B., Tillman, G. E., Tyson, G. H., & Klimke, W. (2021). AMRFinderPlus and the Reference Gene Catalog facilitate examination of the genomic links among antimicrobial resistance, stress response, and virulence. Scientific reports, 11(1), 12728. DOI: 10.1038/s41598-021-91456-0
  • " : "", + !params.arg_skip_deeparg ? "
  • Arango-Argoty, G., Garner, E., Pruden, A., Heath, L. S., Vikesland, P., & Zhang, L. (2018). DeepARG: a deep learning approach for predicting antibiotic resistance genes from metagenomic data. Microbiome, 6(1), 23. DOI: 10.1186/s40168-018-0401-z" : "", + !params.arg_skip_abricate ? "
  • Seemann, T. (2020). ABRicate. Github https://github.com/tseemann/abricate.
  • " : "", + "
  • Public Health Alliance for Genomic Epidemiology (pha4ge). (2022). Parse multiple Antimicrobial Resistance Analysis Reports into a common data structure. Github. Retrieved October 5, 2022, from https://github.com/pha4ge/hAMRonization
  • " + ].join(' ').trim().replaceAll(", +\\.", ".") + + def bgc_text = [ + !params.bgc_skip_antismash ? "
  • Blin, K., Shaw, S., Kloosterman, A. M., Charlop-Powers, Z., van Wezel, G. P., Medema, M. H., & Weber, T. (2021). antiSMASH 6.0: improving cluster detection and comparison capabilities. Nucleic acids research, 49(W1), W29–W35. DOI:
  • " : "", + !params.bgc_skip_deepbgc ? "
  • Hannigan, G. D., Prihoda, D., Palicka, A., Soukup, J., Klempir, O., Rampula, L., Durcak, J., Wurst, M., Kotowski, J., Chang, D., Wang, R., Piizzi, G., Temesi, G., Hazuda, D. J., Woelk, C. H., & Bitton, D. A. (2019). A deep learning genome-mining strategy for biosynthetic gene cluster prediction. Nucleic acids research, 47(18), e110. DOI: 10.1093/nar/gkz654
  • " : "", + !params.bgc_skip_gecco ? "
  • Carroll, L. M. , Larralde, M., Fleck, J. S., Ponnudurai, R., Milanese, A., Cappio Barazzone, E. & Zeller, G. (2021). Accurate de novo identification of biosynthetic gene clusters with GECCO. bioRxiv DOI: 0.1101/2021.05.03.442509
  • " : "", + "
  • Frangenberg, J. Fellows Yates, J. A., Ibrahim, A., Perelo, L., & Beber, M. E. (2023). nf-core/funcscan: 1.0.0 - German Rollmops - 2023-02-15. https://doi.org/10.5281/zenodo.7643100
  • " + ].join(' ').replaceAll(", +\\.", ".").trim() + + def postprocessing_text = "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. https://doi.org/10.1093/bioinformatics/btw354
  • " + + // Special as reused in multiple subworkflows, and we don't want to cause duplicates + def hmmsearch_text = ( params.run_amp_screening && !params.amp_skip_hmmsearch ) || (params.run_bgc_screening && !params.bgc_skip_hmmsearch) ? "
  • Eddy S. R. (2011). Accelerated Profile HMM Searches. PLoS computational biology, 7(10), e1002195. DOI: 10.1371/journal.pcbi.1002195
  • " : "" + + def reference_text = [ + preprocessing_text, + annotation_text, + params.run_amp_screening ? amp_text : "", + params.run_arg_screening ? arg_text : "", + params.run_bgc_screening ? bgc_text : "", + hmmsearch_text, + postprocessing_text, + ].join(' ').trim() + + return reference_text + + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file def meta = [:] meta.workflow = run_workflow.toMap() meta["manifest_map"] = run_workflow.manifest.toMap() + // Pipeline DOI meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + meta["tool_bibliography"] = toolBibliographyText(params) + def methods_text = mqc_methods_yaml.text def engine = new SimpleTemplateEngine() diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy old mode 100755 new mode 100644 index 8966df00..1b9b0766 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -19,40 +19,10 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - // - // Generate help string - // - public static String help(workflow, params) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Generate parameter summary log string - // - public static String paramsSummaryLog(workflow, params) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } - // // Validate parameters and print summary to screen // public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params) - System.exit(0) - } // Print workflow version and exit on --version if (params.version) { @@ -61,14 +31,6 @@ class WorkflowMain { System.exit(0) } - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params) - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) diff --git a/main.nf b/main.nf index 5251114f..5d87fca2 100644 --- a/main.nf +++ b/main.nf @@ -25,6 +25,22 @@ params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { validateParameters; paramsHelp } from 'plugin/nf-validation' + +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} + +// Validate input parameters +if (params.validate_params) { + validateParameters() +} + WorkflowMain.initialise(workflow, params, log) /* diff --git a/modules.json b/modules.json index 48f89af0..2d47f5a7 100644 --- a/modules.json +++ b/modules.json @@ -93,17 +93,17 @@ }, "fastqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "bd8092b67b5103bdd52e300f75889442275c3117", "installed_by": ["modules"] }, "gecco/run": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "8c029dd8e67754d937fb6b6814e568f1decb2fea", "installed_by": ["modules"] }, "gunzip": { "branch": "master", - "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", "installed_by": ["modules"] }, "hamronization/abricate": { @@ -163,7 +163,7 @@ }, "pyrodigal": { "branch": "master", - "git_sha": "1b91efd7ff7b2fb5fe0d78f0d2f6c728afc5e552", + "git_sha": "dd3ed02ddb21363b1892e4705c164aa4cf945435", "installed_by": ["modules"] }, "rgi/main": { @@ -178,7 +178,7 @@ }, "untar": { "branch": "master", - "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "git_sha": "d0b4fc03af52a1cc8c6fb4493b921b57352b1dd8", "installed_by": ["modules"] } } diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index ca9ba3f5..00000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,31 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - label 'process_single' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in nf-core/funcscan/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 07d5e433..249f9064 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -29,7 +29,11 @@ process FASTQC { printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name done - fastqc $args --threads $task.cpus $renamed_files + + fastqc \\ + $args \\ + --threads $task.cpus \\ + $renamed_files cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gecco/run/main.nf b/modules/nf-core/gecco/run/main.nf index b0a76a71..711fb965 100644 --- a/modules/nf-core/gecco/run/main.nf +++ b/modules/nf-core/gecco/run/main.nf @@ -2,10 +2,10 @@ process GECCO_RUN { tag "$meta.id" label 'process_low' - conda "bioconda::gecco=0.9.2" + conda "bioconda::gecco=0.9.8" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gecco:0.9.2--pyhdfd78af_0': - 'biocontainers/gecco:0.9.2--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gecco:0.9.8--pyhdfd78af_0': + 'biocontainers/gecco:0.9.8--pyhdfd78af_0' }" input: tuple val(meta), path(input), path(hmm) diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index e7189d2f..73bf08cd 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -21,10 +21,14 @@ process GUNZIP { def args = task.ext.args ?: '' gunzip = archive.toString() - '.gz' """ - gunzip \\ - -f \\ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ $args \\ - $archive + $archive \\ + > $gunzip cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/pyrodigal/main.nf b/modules/nf-core/pyrodigal/main.nf index c6429b9d..aa8e9e48 100644 --- a/modules/nf-core/pyrodigal/main.nf +++ b/modules/nf-core/pyrodigal/main.nf @@ -34,7 +34,7 @@ process PYRODIGAL { -a ${prefix}.faa \\ -s ${prefix}.score - pigz -nm ${prefix}* + pigz -nmf ${prefix}* cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index 8cd1856c..61461c39 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -2,7 +2,7 @@ process UNTAR { tag "$archive" label 'process_single' - conda "conda-forge::sed=4.7 bioconda::grep=3.4 conda-forge::tar=1.34" + conda "conda-forge::sed=4.7 conda-forge::grep=3.11 conda-forge::tar=1.34" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" diff --git a/nextflow.config b/nextflow.config index a9e49e85..95ffa0c1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,10 +12,11 @@ params { // Input options input = null - // References NOT USED IN FUNCSCAN, KEPT FOR TEMPLATE MERGE PURPOSES + // References - Not used in funcscan, left for template purposes genome = null igenomes_base = 's3://ngi-igenomes/igenomes' - igenomes_ignore = true + igenomes_ignore = false + // Annotation options annotation_tool = 'pyrodigal' @@ -184,7 +185,6 @@ params { // Boilerplate options outdir = null - tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null email_on_fail = null @@ -193,17 +193,14 @@ params { hook_url = null help = false version = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes,fasta,igenomes_base' // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null // Max resource options // Defaults only, expecting to be overwritten @@ -211,6 +208,13 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -235,7 +239,7 @@ profiles { debug { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' - cleanup = false + cleanup = false } conda { conda.enabled = true @@ -325,6 +329,20 @@ profiles { test_bgc { includeConfig 'conf/test_bgc.config' } test_full { includeConfig 'conf/test_full.config' } test_deeparg { includeConfig 'conf/test_deeparg.config' } + test_nothing { includeConfig 'conf/test_nothing.config' } +} + +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Load igenomes.config if required @@ -348,28 +366,22 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] -// Set default registry for Docker and Podman independent of -profile -// Will not be used unless Docker / Podman are enabled -// Set to your registry if you have a mirror of containers -docker.registry = 'quay.io' -podman.registry = 'quay.io' - def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -378,8 +390,8 @@ manifest { homePage = 'https://github.com/nf-core/funcscan' description = """Pipeline for screening for functional components of assembled contigs""" mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' - version = '1.1.2' + nextflowVersion = '!>=23.04.0' + version = '1.1.3' doi = '10.5281/zenodo.7643099' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 81591e6f..93979e7d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,6 +15,7 @@ "input": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "schema": "assets/schema_input.json", @@ -96,7 +97,6 @@ "properties": { "annotation_bakta_db_localpath": { "type": "string", - "default": "None", "fa_icon": "fas fa-database", "description": "Specify a path to BAKTA database.", "help_text": "Specify a path to a database that is prepared in a BAKTA format." @@ -250,7 +250,7 @@ "default": "Bacteria", "fa_icon": "fab fa-accusoft", "description": "Specify the kingdom that the input represents.", - "help_text": "Specifies the kingdom that the input sample is derived from and/or you wish to screen for\n\n> \u26a0\ufe0f Prokka cannot annotate Eukaryotes.\n\nFor more information please check Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--kingdom`", + "help_text": "Specifies the kingdom that the input sample is derived from and/or you wish to screen for\n\n> ⚠️ Prokka cannot annotate Eukaryotes.\n\nFor more information please check Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--kingdom`", "enum": ["Archaea", "Bacteria", "Mitochondria", "Viruses"] }, "annotation_prokka_gcode": { @@ -271,7 +271,7 @@ }, "annotation_prokka_evalue": { "type": "number", - "default": 1e-6, + "default": 0.000001, "description": "Minimum e-value cut-off.", "help_text": "Specifiy the minimum e-value used for filtering the alignment hits.\n\nFor more information please check Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--evalue`", "fa_icon": "fas fa-sort-amount-down" @@ -300,7 +300,6 @@ "annotation_prokka_centre": { "type": "string", "description": "Sequencing centre ID.", - "default": "None", "fa_icon": "fas fa-map-marker-alt", "help_text": "Add the sequencing center ID used in generating the raw sequences. This flag is typically requested in combination with the `--compliant` flag when contigs need to be renamed due to non-conforming contig headers. For more information please check Prokka [documentation](https://github.com/tseemann/prokka). \n\n> Modifies tool parameter(s):\n> - Prokka: `--centre`" }, @@ -470,7 +469,6 @@ }, "amp_hmmsearch_models": { "type": "string", - "default": "None", "description": "Specify path to the AMP hmm model file(s) to search against. Must have quotes if wildcard used.", "help_text": "HMMSearch performs biosequence analysis using profile hidden Markov Models.\nThe models are specified in`.hmm` files that are specified with this parameter\n\ne.g. \n\n```\n--amp_hmmsearch_models '////*.hmm'\n```\n\nYou must wrap the path in quotes if you use a wildcard, to ensure Nextflow expansion _not_ bash!\n\nFor more information check HMMER [documentation](http://hmmer.org/).", "fa_icon": "fas fa-layer-group" @@ -521,9 +519,8 @@ "properties": { "amp_ampcombi_db": { "type": "string", - "description": "Path to AMPcombi reference database directory.", - "help_text": "Path to the folder containing the reference database files:\n1. a fasta file with a `.fasta` file extension\n2. the corresponding table with with functional and taxonomic classifications in `.tsv` file extension.\n\nFor more information check AMPcombi [documentation](https://github.com/Darcy220606/AMPcombi).", - "default": "None", + "description": "Path to AMPcombi reference database directory (DRAMP).", + "help_text": "AMPcombi uses the 'general AMPs' dataset of the (DRAMP database)[http://dramp.cpu-bioinfor.org/downloads/] for taxonomic classification. If you have a local version of it, you can provide the path to the folder containing the reference database files:\n1. a fasta file with a `.fasta` file extension\n2. the corresponding table with with functional and taxonomic classifications in `.tsv` file extension.\n\nFor more information check AMPcombi [documentation](https://github.com/Darcy220606/AMPcombi).", "fa_icon": "fas fa-address-book" }, "amp_ampcombi_cutoff": { @@ -551,7 +548,6 @@ }, "arg_amrfinderplus_db": { "type": "string", - "default": "None", "fa_icon": "fas fa-layer-group", "help_text": "Specify the path to a local version of the ARMFinderPlus database. If no input is given, the pipeline will download the database for you.\n\n See the nf-core/funcscan usage [documentation](https://nf-co.re/funcscan/usage) for more information.", "description": "Specify the path to a local version of the ARMfinderPlus database." @@ -608,7 +604,6 @@ }, "arg_deeparg_data": { "type": "string", - "default": "None", "fa_icon": "fab fa-deezer", "description": "Specify the path to the DeepARG database.", "help_text": "Specify the path to a local version of the DeepARG database (see the pipelines' usage [documentation](https://nf-co.re/funcscan/usage)). If no input is given, the module will download the database for you, however this is not recommended, as the database is large and this will take time." @@ -854,14 +849,12 @@ "type": "string", "description": "Path to user-defined local antiSMASH database.", "fa_icon": "fas fa-layer-group", - "default": "None", "help_text": "It is recommend to pre-download the antiSMASH databases to your machine and pass the path of it to this parameter, as this can take a long time to download - particularly when running lots of pipeline runs. \n\nSee the pipeline [documentation](https://nf-co.re/funcscan/usage#antismash) for details on how to download this. If running with docker or singularity, please also check `--bgc_antismash_installationdirectory` for important information." }, "bgc_antismash_installationdirectory": { "type": "string", "description": "Path to user-defined local antiSMASH directory. Only required when running with docker/singularity.", "fa_icon": "far fa-folder-open", - "default": "None", "help_text": "This is required when running with **docker and singularity** (not required for conda), due to attempted 'modifications' of files during database checks in the installation directory, something that cannot be done in immutable docker/singularity containers.\n\nTherefore, a local installation directory needs to be mounted (including all modified files from the downloading step) to the container as a workaround." }, "bgc_antismash_sampleminlength": { @@ -869,7 +862,7 @@ "default": 1000, "description": "Minimum longest-contig length a sample must have to be screened with antiSMASH.", "fa_icon": "fas fa-ruler-horizontal", - "help_text": "This specifies the minimum length that the longest contig must have for the entire sample to be screened by antiSMASH.\n\nAny samples that do not reach this length will be not be sent to antiSMASH, therefore you will not receive output for these samples in your `--outdir`.\n\n> \u26a0\ufe0f This is not the same as `--bgc_antismash_contigminlength`, which specifies to only analyse contigs above that threshold but _within_ a sample that has already passed `--bgc_antismash_sampleminlength` sample filter!" + "help_text": "This specifies the minimum length that the longest contig must have for the entire sample to be screened by antiSMASH.\n\nAny samples that do not reach this length will be not be sent to antiSMASH, therefore you will not receive output for these samples in your `--outdir`.\n\n> ⚠️ This is not the same as `--bgc_antismash_contigminlength`, which specifies to only analyse contigs above that threshold but _within_ a sample that has already passed `--bgc_antismash_sampleminlength` sample filter!" }, "bgc_antismash_contigminlength": { "type": "integer", @@ -941,7 +934,6 @@ }, "bgc_deepbgc_database": { "type": "string", - "default": "None", "fa_icon": "fas fa-layer-group", "description": "Path to local deepBGC database folder." }, @@ -1070,7 +1062,6 @@ }, "bgc_hmmsearch_models": { "type": "string", - "default": "None", "description": "Specify path to the BGC hmm model file(s) to search against. Must have quotes if wildcard used.", "help_text": "HMMSearch performs biosequence analysis using profile hidden Markov Models.\nThe models are specified in`.hmm` files that are specified with this parameter\n\ne.g. \n\n```\n--bgc_hmmsearch_models '////*.hmm'\n```\n\nYou must wrap the path in quotes if you use a wildcard, to ensure Nextflow expansion _not_ bash!\n\nFor more information check HMMER [documentation](http://hmmer.org/).", "fa_icon": "fas fa-layer-group" @@ -1130,6 +1121,7 @@ "fasta": { "type": "string", "format": "file-path", + "exists": false, "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", @@ -1231,7 +1223,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -1302,6 +1294,7 @@ }, "multiqc_config": { "type": "string", + "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true @@ -1317,13 +1310,6 @@ "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog" }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -1331,12 +1317,26 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index d97e91e7..00000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,36 +0,0 @@ -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_input_channels(it) } - .set { contigs } - - emit: - contigs // channel: [ val(meta), [ fasta ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] -} - -// Function to get list of [ meta, [ fasta ] ] -def create_input_channels(LinkedHashMap row) { - def meta = [:] - meta.id = row.sample - - def array = [] - if (!file(row.fasta).exists()) { - error("[funscan] error: please check input samplesheet. FASTA file does not exist for: \n${row.fasta}") - } else { - array = [ meta, file(row.fasta) ] - } - - return array -} diff --git a/workflows/funcscan.nf b/workflows/funcscan.nf index 27168b24..e46c6c2b 100644 --- a/workflows/funcscan.nf +++ b/workflows/funcscan.nf @@ -1,40 +1,33 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + PRINT PARAMS SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { validateParameters; paramsHelp; paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) +include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' + +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) + +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation -// Validate input parameters WorkflowFuncscan.initialise(params, log) // Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.multiqc_config, params.annotation_bakta_db_localpath, +/*def checkPathParamList = [ params.input, params.multiqc_config, params.annotation_bakta_db_localpath, params.amp_hmmsearch_models, params.amp_ampcombi_db, params.arg_amrfinderplus_db, params.arg_deeparg_data, params.bgc_antismash_databases, params.bgc_antismash_installationdirectory, params.bgc_deepbgc_database, params.bgc_hmmsearch_models ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + // Check mandatory parameters if (params.input) { ch_input = file(params.input) } else { error("Input samplesheet not specified!") } - -// Validate fARGene inputs -// Split input into array, find the union with our valid classes, extract only -// invalid classes, and if they exist, exit. Note `tokenize` used here as this -// works for `interesect` and other groovy functions, but require `split` for -// `Channel.of` creation. See `arg.nf` for latter. -def fargene_classes = params.arg_fargene_hmmmodel -def fargene_valid_classes = [ "class_a", "class_b_1_2", "class_b_3", - "class_c", "class_d_1", "class_d_2", - "qnr", "tet_efflux", "tet_rpg", "tet_enzyme" - ] -def fargene_user_classes = fargene_classes.tokenize(',') -def fargene_classes_valid = fargene_user_classes.intersect( fargene_valid_classes ) -def fargene_classes_missing = fargene_user_classes - fargene_classes_valid - -if ( fargene_classes_missing.size() > 0 ) error("[nf-core/funcscan] ERROR: invalid class present in --arg_fargene_hmmodel. Please check input. Invalid class: ${fargene_classes_missing.join(', ')}") +*/ // Validate antiSMASH inputs // 1. Make sure that either both or none of the antiSMASH directories are supplied @@ -71,8 +64,6 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' - include { AMP } from '../subworkflows/local/amp' include { ARG } from '../subworkflows/local/arg' include { BGC } from '../subworkflows/local/bgc' @@ -117,16 +108,10 @@ workflow FUNCSCAN { ch_versions = Channel.empty() ch_multiqc_logo = Channel.fromPath("$projectDir/docs/images/nf-core-funcscan_logo_flat_light.png") - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - INPUT_CHECK ( - ch_input - ) - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + ch_input = Channel.fromSamplesheet("input") // Some tools require uncompressed input - fasta_prep = INPUT_CHECK.out.contigs + fasta_prep = ch_input .branch { compressed: it[1].toString().endsWith('.gz') uncompressed: it[1] @@ -184,8 +169,8 @@ workflow FUNCSCAN { GUNZIP_PYRODIGAL_GFF ( PYRODIGAL.out.gff ) ch_versions = ch_versions.mix(PYRODIGAL.out.versions) ch_annotation_faa = GUNZIP_PYRODIGAL_FAA.out.gunzip - ch_annotation_fna = GUNZIP_PYRODIGAL_FAA.out.gunzip - ch_annotation_gff = GUNZIP_PYRODIGAL_FAA.out.gunzip + ch_annotation_fna = GUNZIP_PYRODIGAL_FNA.out.gunzip + ch_annotation_gff = GUNZIP_PYRODIGAL_GFF.out.gunzip ch_annotation_gbk = Channel.empty() // Pyrodigal doesn't produce GBK } else if ( params.annotation_tool == "prokka" ) { PROKKA ( ch_prepped_input, [], [] ) @@ -232,7 +217,15 @@ workflow FUNCSCAN { AMPs */ if ( params.run_amp_screening ) { - AMP ( ch_prepped_input, ch_annotation_faa ) + AMP ( + ch_prepped_input, + ch_annotation_faa + .filter { + meta, file -> + if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") + !file.isEmpty() + } + ) ch_versions = ch_versions.mix(AMP.out.versions) } @@ -243,7 +236,15 @@ workflow FUNCSCAN { if (params.arg_skip_deeparg) { ARG ( ch_prepped_input, [] ) } else { - ARG ( ch_prepped_input, ch_annotation_faa ) + ARG ( + ch_prepped_input, + ch_annotation_faa + .filter { + meta, file -> + if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") + !file.isEmpty() + } + ) } ch_versions = ch_versions.mix(ARG.out.versions) } @@ -252,7 +253,27 @@ workflow FUNCSCAN { BGCs */ if ( params.run_bgc_screening ) { - BGC ( ch_prepped_input, ch_annotation_gff, ch_annotation_faa, ch_annotation_gbk ) + BGC ( + ch_prepped_input, + ch_annotation_gff + .filter { + meta, file -> + if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty GFF file. AMP screening tools requiring this file will not be executed: ${meta.id}") + !file.isEmpty() + }, + ch_annotation_faa + .filter { + meta, file -> + if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") + !file.isEmpty() + }, + ch_annotation_gbk + .filter { + meta, file -> + if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty GBK file. AMP screening tools requiring this file will not be executed: ${meta.id}") + !file.isEmpty() + } + ) ch_versions = ch_versions.mix(BGC.out.versions) } @@ -266,13 +287,14 @@ workflow FUNCSCAN { workflow_summary = WorkflowFuncscan.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) - methods_description = WorkflowFuncscan.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + methods_description = WorkflowFuncscan.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) ch_methods_description = Channel.value(methods_description) ch_multiqc_files = Channel.empty() ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) + if(params.annotation_tool=='prokka'){ch_multiqc_files = ch_multiqc_files.mix( PROKKA.out.txt.collect{it[1]}.ifEmpty([])) } MULTIQC ( ch_multiqc_files.collect(),