From c285a03900d5d8420f0a9da7cec1e161bd3f51c2 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Thu, 4 Aug 2022 14:39:35 +0200 Subject: [PATCH 01/78] bump version --- CHANGELOG.md | 8 ++++++++ nextflow.config | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9c57e5..07bed90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v2.2.0dev - [name] - [date] + +### `Added` + +### `Changed` + +### `Fixed` + ## v2.1.0 - Nordring - 2022-08-02 ### `Added` diff --git a/nextflow.config b/nextflow.config index 0d812cb..87c873a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -213,7 +213,7 @@ manifest { description = 'A fully reproducible and state of the art epitope prediction pipeline.' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' - version = '2.1.0' + version = '2.2.0dev' } // Function to ensure that resource requirements don't go beyond From 30d763e7adefb11e569596bdc62564d9bae16a23 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 30 Aug 2022 13:31:12 +0000 Subject: [PATCH 02/78] Template update for nf-core/tools version 2.5 --- .editorconfig | 2 +- .github/PULL_REQUEST_TEMPLATE.md | 3 +- .github/workflows/ci.yml | 23 ++------ .github/workflows/linting.yml | 38 +++++++++++-- CHANGELOG.md | 2 +- CITATION.cff | 56 +++++++++++++++++++ README.md | 21 +++---- assets/email_template.txt | 1 - bin/check_samplesheet.py | 41 +++++++------- conf/base.config | 5 ++ docs/usage.md | 12 ++-- lib/WorkflowEpitopeprediction.groovy | 5 +- lib/WorkflowMain.groovy | 9 ++- main.nf | 2 +- modules.json | 22 +++++--- .../templates/dumpsoftwareversions.py | 14 +++-- nextflow.config | 23 +++++++- 17 files changed, 186 insertions(+), 93 deletions(-) create mode 100644 CITATION.cff diff --git a/.editorconfig b/.editorconfig index b6b3190..b78de6e 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js}] +[*.{md,yml,yaml,html,css,scss,js,cff}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 6cf49e7..e530977 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,8 +15,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/epit - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/epitopeprediction/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/epitopeprediction _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/epitopeprediction/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/epitopeprediction _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0dbd4dc..a4205db 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,6 @@ on: env: NXF_ANSI_LOG: false - CAPSULE_LOG: none jobs: test: @@ -20,27 +19,17 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - # Nextflow versions - include: - # Test pipeline minimum Nextflow version - - NXF_VER: "21.10.3" - NXF_EDGE: "" - # Test latest edge release of Nextflow - - NXF_VER: "" - NXF_EDGE: "1" + NXF_VER: + - "21.10.3" + - "latest-everything" steps: - name: Check out pipeline code uses: actions/checkout@v2 - name: Install Nextflow - env: - NXF_VER: ${{ matrix.NXF_VER }} - # Uncomment only if the edge release is more recent than the latest stable release - # See https://github.com/nextflow-io/nextflow/issues/2467 - # NXF_EDGE: ${{ matrix.NXF_EDGE }} - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" - name: Run pipeline with test data # TODO nf-core: You can customise CI pipeline run tests as required diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 77358de..8a5ce69 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -35,6 +35,36 @@ jobs: - name: Run Prettier --check run: prettier --check ${GITHUB_WORKSPACE} + PythonBlack: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Check code lints with Black + uses: psf/black@stable + + # If the above check failed, post a comment on the PR explaining the failure + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + ## Python linting (`black`) is failing + + To keep the code consistent with lots of contributors, we run automated code consistency checks. + To fix this CI test, please run: + + * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` + * Fix formatting errors in your pipeline: `black .` + + Once you push these changes the test should pass, and you can hide this comment :+1: + + We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false + nf-core: runs-on: ubuntu-latest steps: @@ -42,15 +72,11 @@ jobs: uses: actions/checkout@v2 - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 - uses: actions/setup-python@v3 with: - python-version: "3.6" + python-version: "3.7" architecture: "x64" - name: Install dependencies diff --git a/CHANGELOG.md b/CHANGELOG.md index b13d368..a3ccf00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.1.0dev - [date] +## v2.2.0dev - [date] Initial release of nf-core/epitopeprediction, created with the [nf-core](https://nf-co.re/) template. diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..4533e2f --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,56 @@ +cff-version: 1.2.0 +message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication" +authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Ulysse Garcia + given-names: Maxime + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven +title: "The nf-core framework for community-curated bioinformatics pipelines." +version: 2.4.1 +doi: 10.1038/s41587-020-0439-x +date-released: 2022-05-16 +url: https://github.com/nf-core/tools +prefered-citation: + type: article + authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Ulysse Garcia + given-names: Maxime + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven + doi: 10.1038/s41587-020-0439-x + journal: nature biotechnology + start: 276 + end: 278 + title: "The nf-core framework for community-curated bioinformatics pipelines." + issue: 3 + volume: 38 + year: 2020 + url: https://dx.doi.org/10.1038/s41587-020-0439-x diff --git a/README.md b/README.md index 90574ea..61a86fa 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,14 @@ # ![nf-core/epitopeprediction](docs/images/nf-core-epitopeprediction_logo_light.png#gh-light-mode-only) ![nf-core/epitopeprediction](docs/images/nf-core-epitopeprediction_logo_dark.png#gh-dark-mode-only) -[![GitHub Actions CI Status](https://github.com/nf-core/epitopeprediction/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/epitopeprediction/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting Status](https://github.com/nf-core/epitopeprediction/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/epitopeprediction/actions?query=workflow%3A%22nf-core+linting%22) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/epitopeprediction/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/epitopeprediction/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/) -[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?logo=docker)](https://www.docker.com/) -[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg)](https://sylabs.io/docs/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/epitopeprediction) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23epitopeprediction-4A154B?logo=slack)](https://nfcore.slack.com/channels/epitopeprediction) -[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?logo=twitter)](https://twitter.com/nf_core) -[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23epitopeprediction-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/epitopeprediction)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction @@ -25,7 +20,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool -On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/epitopeprediction/results). +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/epitopeprediction/results). ## Pipeline summary @@ -42,7 +37,7 @@ On release, automated continuous integration tests run the pipeline on a full-si 3. Download the pipeline and test it on a minimal dataset with a single command: - ```console + ```bash nextflow run nf-core/epitopeprediction -profile test,YOURPROFILE --outdir ``` @@ -57,7 +52,7 @@ On release, automated continuous integration tests run the pipeline on a full-si - ```console + ```bash nextflow run nf-core/epitopeprediction --input samplesheet.csv --outdir --genome GRCh37 -profile ``` diff --git a/assets/email_template.txt b/assets/email_template.txt index 945a397..8b39053 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -6,7 +6,6 @@ `._,._,' nf-core/epitopeprediction v${version} ---------------------------------------------------- - Run Name: $runName <% if (success){ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 3652c63..9a8b896 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -11,7 +11,6 @@ from collections import Counter from pathlib import Path - logger = logging.getLogger() @@ -79,13 +78,15 @@ def validate_and_transform(self, row): def _validate_sample(self, row): """Assert that the sample name exists and convert spaces to underscores.""" - assert len(row[self._sample_col]) > 0, "Sample input is required." + if len(row[self._sample_col]) <= 0: + raise AssertionError("Sample input is required.") # Sanitize samples slightly. row[self._sample_col] = row[self._sample_col].replace(" ", "_") def _validate_first(self, row): """Assert that the first FASTQ entry is non-empty and has the right format.""" - assert len(row[self._first_col]) > 0, "At least the first FASTQ file is required." + if len(row[self._first_col]) <= 0: + raise AssertionError("At least the first FASTQ file is required.") self._validate_fastq_format(row[self._first_col]) def _validate_second(self, row): @@ -97,36 +98,34 @@ def _validate_pair(self, row): """Assert that read pairs have the same file extension. Report pair status.""" if row[self._first_col] and row[self._second_col]: row[self._single_col] = False - assert ( - Path(row[self._first_col]).suffixes[-2:] == Path(row[self._second_col]).suffixes[-2:] - ), "FASTQ pairs must have the same file extensions." + if Path(row[self._first_col]).suffixes[-2:] != Path(row[self._second_col]).suffixes[-2:]: + raise AssertionError("FASTQ pairs must have the same file extensions.") else: row[self._single_col] = True def _validate_fastq_format(self, filename): """Assert that a given filename has one of the expected FASTQ extensions.""" - assert any(filename.endswith(extension) for extension in self.VALID_FORMATS), ( - f"The FASTQ file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) + if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): + raise AssertionError( + f"The FASTQ file has an unrecognized extension: {filename}\n" + f"It should be one of: {', '.join(self.VALID_FORMATS)}" + ) def validate_unique_samples(self): """ Assert that the combination of sample name and FASTQ filename is unique. - In addition to the validation, also rename the sample if more than one sample, - FASTQ file combination exists. + In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the + number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. """ - assert len(self._seen) == len(self.modified), "The pair of sample name and FASTQ must be unique." - if len({pair[0] for pair in self._seen}) < len(self._seen): - counts = Counter(pair[0] for pair in self._seen) - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - if counts[sample] > 1: - row[self._sample_col] = f"{sample}_T{seen[sample]}" + if len(self._seen) != len(self.modified): + raise AssertionError("The pair of sample name and FASTQ must be unique.") + seen = Counter() + for row in self.modified: + sample = row[self._sample_col] + seen[sample] += 1 + row[self._sample_col] = f"{sample}_T{seen[sample]}" def read_head(handle, num_lines=10): diff --git a/conf/base.config b/conf/base.config index 01fb3d0..71f5f2f 100644 --- a/conf/base.config +++ b/conf/base.config @@ -26,6 +26,11 @@ process { // adding in your local modules too. // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } withLabel:process_low { cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } diff --git a/docs/usage.md b/docs/usage.md index 30feaaa..9ad2b37 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -12,7 +12,7 @@ You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. -```console +```bash --input '[path to samplesheet file]' ``` @@ -56,7 +56,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: -```console +```bash nextflow run nf-core/epitopeprediction --input samplesheet.csv --outdir --genome GRCh37 -profile docker ``` @@ -64,9 +64,9 @@ This will launch the pipeline with the `docker` configuration profile. See below Note that the pipeline will create the following files in your working directory: -```console +```bash work # Directory containing the nextflow working files - # Finished results in specified location (defined with --outdir) + # Finished results in specified location (defined with --outdir) .nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` @@ -75,7 +75,7 @@ work # Directory containing the nextflow working files When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: -```console +```bash nextflow pull nf-core/epitopeprediction ``` @@ -251,6 +251,6 @@ Some HPC setups also allow you to run nextflow within a cluster job submitted yo In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): -```console +```bash NXF_OPTS='-Xms1g -Xmx4g' ``` diff --git a/lib/WorkflowEpitopeprediction.groovy b/lib/WorkflowEpitopeprediction.groovy index ad1dfb2..c209f5f 100755 --- a/lib/WorkflowEpitopeprediction.groovy +++ b/lib/WorkflowEpitopeprediction.groovy @@ -10,6 +10,7 @@ class WorkflowEpitopeprediction { public static void initialise(params, log) { genomeExistsError(params, log) + if (!params.fasta) { log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." System.exit(1) @@ -41,9 +42,7 @@ class WorkflowEpitopeprediction { yaml_file_text += "data: |\n" yaml_file_text += "${summary_section}" return yaml_file_text - } - - // + }// // Exit pipeline if incorrect --genome key provided // private static void genomeExistsError(params, log) { diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index c9abb65..86a4ecf 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -59,6 +59,7 @@ class WorkflowMain { } // Print parameter summary log to screen + log.info paramsSummaryLog(workflow, params, log) // Check that a -profile or Nextflow config has been provided to run the pipeline @@ -78,17 +79,15 @@ class WorkflowMain { System.exit(1) } } - // // Get attribute from genome config file e.g. fasta // - public static String getGenomeAttribute(params, attribute) { - def val = '' + public static Object getGenomeAttribute(params, attribute) { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { if (params.genomes[ params.genome ].containsKey(attribute)) { - val = params.genomes[ params.genome ][ attribute ] + return params.genomes[ params.genome ][ attribute ] } } - return val + return null } } diff --git a/main.nf b/main.nf index e92042e..4b254aa 100644 --- a/main.nf +++ b/main.nf @@ -4,7 +4,7 @@ nf-core/epitopeprediction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/epitopeprediction - Website: https://nf-co.re/epitopeprediction +Website: https://nf-co.re/epitopeprediction Slack : https://nfcore.slack.com/channels/epitopeprediction ---------------------------------------------------------------------------------------- */ diff --git a/modules.json b/modules.json index 3a01c7e..75f3e7e 100644 --- a/modules.json +++ b/modules.json @@ -3,14 +3,20 @@ "homePage": "https://github.com/nf-core/epitopeprediction", "repos": { "nf-core/modules": { - "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "fastqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "multiqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_url": "https://github.com/nf-core/modules.git", + "modules": { + "custom/dumpsoftwareversions": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", + "branch": "master" + }, + "fastqc": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", + "branch": "master" + }, + "multiqc": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", + "branch": "master" + } } } } diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index d139039..787bdb7 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -1,9 +1,10 @@ #!/usr/bin/env python -import yaml import platform from textwrap import dedent +import yaml + def _make_versions_html(versions): html = [ @@ -58,11 +59,12 @@ def _make_versions_html(versions): for process, process_versions in versions_by_process.items(): module = process.split(":")[-1] try: - assert versions_by_module[module] == process_versions, ( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) except KeyError: versions_by_module[module] = process_versions diff --git a/nextflow.config b/nextflow.config index 6bb3ca8..318b381 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,11 +13,11 @@ params { // Input options input = null + // References genome = null igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false - // MultiQC options multiqc_config = null multiqc_title = null @@ -37,6 +37,7 @@ params { schema_ignore_params = 'genomes' enable_conda = false + // Config options custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" @@ -45,6 +46,7 @@ params { config_profile_url = null config_profile_name = null + // Max resource options // Defaults only, expecting to be overwritten max_memory = '128.GB' @@ -72,6 +74,7 @@ try { // } + profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { @@ -82,6 +85,15 @@ profiles { shifter.enabled = false charliecloud.enabled = false } + mamba { + params.enable_conda = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } docker { docker.enabled = true docker.userEmulation = true @@ -119,10 +131,16 @@ profiles { podman.enabled = false shifter.enabled = false } + gitpod { + executor.name = 'local' + executor.cpus = 16 + executor.memory = 60.GB + } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } } + // Load igenomes.config if required if (!params.igenomes_ignore) { includeConfig 'conf/igenomes.config' @@ -130,6 +148,7 @@ if (!params.igenomes_ignore) { params.genomes = [:] } + // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -169,7 +188,7 @@ manifest { description = 'A fully reproducible and state of the art epitope prediction pipeline.' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' - version = '2.1.0dev' + version = '2.2.0dev' } // Load modules.config for DSL2 module specific options From 482809c5ee17be2d04b98d9ac834a00c779743c6 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Wed, 31 Aug 2022 18:14:38 +0200 Subject: [PATCH 03/78] add functionality for processing VEP annotations --- bin/epaa.py | 229 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 146 insertions(+), 83 deletions(-) diff --git a/bin/epaa.py b/bin/epaa.py index 92296b6..4c82060 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -11,6 +11,7 @@ import itertools import pandas as pd import numpy as np + import epytope.Core.Generator as generator import math import json @@ -64,10 +65,8 @@ def get_epytope_annotation(vt, p, r, alt): alternative = str(alt) else: return p, r, alt - return position, reference, alternative - def check_min_req_GSvar(row): """ checking the presence of mandatory columns @@ -209,17 +208,31 @@ def read_vcf(filename, pass_only=True): """ global ID_SYSTEM_USED + vep_header_available = False + # default VEP fields + vep_fields = {"allele": 0,"consequence": 1,"impact": 2,"symbol": 3,"gene": 4,"feature_type": 5,"feature": 6,"biotype": 7,"exon": 8,"intron": 9,"hgvsc": 10,"hgvsp": 11,"cdna_position": 12,"cds_position": 13, "protein_position": 14,"amino_acids": 15,"codons": 16,"existing_variation": 17,"distance": 18,"strand": 19,"flags": 20,"symbol_source": 21,"hgnc_id":22} + + VEP_KEY = "CSQ" + SNPEFF_KEY = "ANN" + vl = list() with open(filename, 'rt') as tsvfile: vcf_reader = vcf.Reader(tsvfile) vl = [r for r in vcf_reader] # list of mandatory (meta)data - exclusion_list = ["ANN"] + exclusion_list = ["ANN", "CSQ"] # DB identifier of variants inclusion_list = ["vardbid"] + # determine format of given VEP annotation + if VEP_KEY in vcf_reader.infos: + split_vep_def = vcf_reader.infos[VEP_KEY] + for idx, field in enumerate(split_vep_def.desc.split()[-1].split('|')): + vep_fields[field.strip().lower()] = idx + vep_header_available = True + # get lists of additional metadata metadata_list = set(vcf_reader.infos.keys()) - set(exclusion_list) metadata_list.update(set(inclusion_list)) @@ -232,24 +245,32 @@ def read_vcf(filename, pass_only=True): genotye_dict = {"het": False, "hom": True, "ref": True} for num, record in enumerate(vl): - c = record.CHROM.strip('chr') - p = record.POS - 1 + chr = record.CHROM.strip('chr') + genomic_position = record.POS variation_dbid = record.ID - r = str(record.REF) - v_list = record.ALT - f = record.FILTER + reference = str(record.REF) + alternative_list = record.ALT + filter = record.FILTER - if pass_only and f: + if pass_only and filter: continue """ Enum for variation types: type.SNP, type.DEL, type.INS, type.FSDEL, type.FSINS, type.UNKNOWN + + VARIANT INCORP IN EPYTOPE + + SNP => seq[pos] = OBS (replace) + INSERTION => seqp[pos:pos] = obs (insert at that position) + DELETION => s = slice(pos, pos+len(ref)) (create slice that will be removed) + del seq[s] (remove) """ vt = VariationType.UNKNOWN if record.is_snp: vt = VariationType.SNP elif record.is_indel: + #@TODO Potential bug here if v_list is really list if len(v_list) % 3 == 0: # no frameshift if record.is_deletion: vt = VariationType.DEL @@ -262,7 +283,7 @@ def read_vcf(filename, pass_only=True): vt = VariationType.FSINS gene = '' - for alt in v_list: + for alt in alternative_list: isHomozygous = False if 'HOM' in record.INFO: isHomozygous = record.INFO['HOM'] == 1 @@ -280,81 +301,123 @@ def read_vcf(filename, pass_only=True): if 'GT' in sample.data: isHomozygous = sample.data['GT'] == '1/1' - if record.INFO['ANN']: + # check if we have SNPEFF or VEP annotated variants, otherwise abort + if record.INFO.get(SNPEFF_KEY, False) or record.INFO.get(VEP_KEY, False): isSynonymous = False coding = dict() types = [] - # for each ANN only add a new coding! see GSvar - for annraw in record.INFO['ANN']: - annots = annraw.split('|') - if len(annots) != 16: - logger.warning( - "read_vcf: Omitted row! Mandatory columns not present in annotation field (ANN). \n Have you annotated your VCF file with SnpEff?") - continue - obs, a_mut_type, impact, a_gene, a_gene_id, feature_type, transcript_id, exon, tot_exon, trans_coding, prot_coding, cdna, cds, aa, distance, warnings = annots - types.append(a_mut_type) - - tpos = 0 - ppos = 0 - positions = '' - - # get cds/protein positions and convert mutation syntax to epytope format - if trans_coding != '': - positions = re.findall(r'\d+', trans_coding) - ppos = int(positions[0]) - 1 - - if prot_coding != '': - positions = re.findall(r'\d+', prot_coding) - tpos = int(positions[0]) - 1 - - isSynonymous = (a_mut_type == "synonymous_variant") - - gene = a_gene_id - # there are no isoforms in biomart - transcript_id = transcript_id.split(".")[0] - - if 'NM' in transcript_id: - ID_SYSTEM_USED = EIdentifierTypes.REFSEQ - - # take online coding variants into account, epytope cannot deal with stopgain variants right now - if not prot_coding or 'stop_gained' in a_mut_type: - continue - - coding[transcript_id] = MutationSyntax( - transcript_id, ppos, tpos, trans_coding, prot_coding) - transcript_ids.append(transcript_id) - - if coding: - pos, reference, alternative = get_epytope_annotation( - vt, p, r, str(alt)) - var = Variant("line" + str(num), vt, c, pos, reference, - alternative, coding, isHomozygous, isSynonymous) - var.gene = gene - var.log_metadata("vardbid", variation_dbid) - final_metadata_list.append("vardbid") - for metadata_name in metadata_list: - if metadata_name in record.INFO: - final_metadata_list.append(metadata_name) - var.log_metadata( - metadata_name, record.INFO[metadata_name]) - - for sample in record.samples: - for format_key in format_list: - if getattr(sample.data, format_key, None) is None: - logger.warning("FORMAT entry {entry} not defined for {genotype}. Skipping.".format( - entry=format_key, genotype=sample.sample)) - continue - format_header = '{}.{}'.format( - sample.sample, format_key) - final_metadata_list.append(format_header) - if isinstance(sample[format_key], list): - format_value = ','.join( - [str(i) for i in sample[format_key]]) - else: - format_value = sample[format_key] - var.log_metadata(format_header, format_value) - dict_vars[var] = var - list_vars.append(var) + # SNPEFF annotation + if SNPEFF_KEY in record.INFO: + for annraw in record.INFO[SNPEFF_KEY]: + annots = annraw.split('|') + if len(annots) != 16: + logger.warning( + "read_vcf: Omitted row! Mandatory columns not present in annotation field (ANN). \n Have you annotated your VCF file with SnpEff?") + continue + obs, a_mut_type, impact, a_gene, a_gene_id, feature_type, transcript_id, exon, tot_exon, trans_coding, prot_coding, cdna, cds, aa, distance, warnings = annots + types.append(a_mut_type) + + tpos = 0 + ppos = 0 + positions = '' + + # get cds/protein positions and convert mutation syntax to epytope format + if trans_coding != '': + positions = re.findall(r'\d+', trans_coding) + ppos = int(positions[0]) - 1 + + if prot_coding != '': + positions = re.findall(r'\d+', prot_coding) + tpos = int(positions[0]) - 1 + + isSynonymous = (a_mut_type == "synonymous_variant") + + gene = a_gene_id + # there are no isoforms in biomart + transcript_id = transcript_id.split(".")[0] + + if 'NM' in transcript_id: + ID_SYSTEM_USED = EIdentifierTypes.REFSEQ + + # take online coding variants into account, epytope cannot deal with stop gain variants right now + if not prot_coding or 'stop_gained' in a_mut_type: + continue + + coding[transcript_id] = MutationSyntax( + transcript_id, ppos, tpos, trans_coding, prot_coding) + transcript_ids.append(transcript_id) + else: + if not vep_header_available: + logger.warning("No CSQ definition found in header, trying to map to default VEP format string.") + for annotation in record.INFO[VEP_KEY]: + split_annotation = annotation.split('|') + isSynonymous = 'synonymous' in split_annotation[vep_fields['consequence']] + gene = split_annotation[vep_fields['gene']] + c_coding = split_annotation[vep_fields["hgvsc"]] + p_coding = split_annotation[vep_fields["hgvsp"]] + cds_pos = split_annotation[vep_fields["cds_position"]] + # not sure yet if this is always the case + if cds_pos: + """ + https://varnomen.hgvs.org/recommendations/general/ + “c.” for a coding DNA reference sequence + “g.” for a linear genomic reference sequence + “m.” for a mitochondrial DNA reference sequence + “n.” for a non-coding DNA reference sequence + “o.” for a circular genomic reference sequence + “p.” for a protein reference sequence + “r.” for an RNA reference sequence (transcript) + + We could filter for coding and genomic here. + """ + ppos = -1 + prot_coding = "" + split_coding_c = c_coding.split(':') + split_coding_p = p_coding.split(':') + # we still need the new functionality here in epytope to query with IDs with version (ENTxxx.x) + transcript_id = split_coding_c[0] if split_coding_c[0] else split_annotation[vep_fields["feature"]] + transcript_id = transcript_id.split('.')[0] + + tpos = int(cds_pos.split('/')[0].split('-')[0]) - 1 + if split_annotation[vep_fields["protein_position"]]: + ppos = int(split_annotation[vep_fields["protein_position"]].split('-')[0].split('/')[0]) - 1 + + coding[transcript_id] = MutationSyntax( + transcript_id, tpos, ppos, split_coding_c[-1], split_coding_p[-1]) + transcript_ids.append(transcript_id) + if coding: + pos, reference, alternative = get_epytope_annotation( + vt, genomic_position, reference, str(alt)) + var = Variant("line" + str(num), vt, chr, pos, reference, + alternative, coding, isHomozygous, isSynonymous) + var.gene = gene + var.log_metadata("vardbid", variation_dbid) + final_metadata_list.append("vardbid") + for metadata_name in metadata_list: + if metadata_name in record.INFO: + final_metadata_list.append(metadata_name) + var.log_metadata( + metadata_name, record.INFO[metadata_name]) + for sample in record.samples: + for format_key in format_list: + if getattr(sample.data, format_key, None) is None: + logger.warning("FORMAT entry {entry} not defined for {genotype}. Skipping.".format( + entry=format_key, genotype=sample.sample)) + continue + format_header = '{}.{}'.format( + sample.sample, format_key) + final_metadata_list.append(format_header) + if isinstance(sample[format_key], list): + format_value = ','.join( + [str(i) for i in sample[format_key]]) + else: + format_value = sample[format_key] + var.log_metadata(format_header, format_value) + dict_vars[var] = var + list_vars.append(var) + else: + logger.error("No supported variant annotation string found. Aborting.") + sys.exit(1) transToVar = {} @@ -1071,7 +1134,7 @@ def __main__(): metadata = [] proteins = [] references = {'GRCh37': 'http://feb2014.archive.ensembl.org', - 'GRCh38': 'http://mar2017.archive.ensembl.org'} + 'GRCh38': 'http://aug2017.archive.ensembl.org'} global transcriptProteinMap global transcriptSwissProtMap From 892d00ee3364bdc3bed62bb9fa1bade5cc26f1b4 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 1 Sep 2022 13:26:21 +0000 Subject: [PATCH 04/78] Template update for nf-core/tools version 2.5.1 --- bin/check_samplesheet.py | 9 ++++++--- pyproject.toml | 10 ++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 pyproject.toml diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 9a8b896..11b1557 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -98,7 +98,9 @@ def _validate_pair(self, row): """Assert that read pairs have the same file extension. Report pair status.""" if row[self._first_col] and row[self._second_col]: row[self._single_col] = False - if Path(row[self._first_col]).suffixes[-2:] != Path(row[self._second_col]).suffixes[-2:]: + first_col_suffix = Path(row[self._first_col]).suffixes[-2:] + second_col_suffix = Path(row[self._second_col]).suffixes[-2:] + if first_col_suffix != second_col_suffix: raise AssertionError("FASTQ pairs must have the same file extensions.") else: row[self._single_col] = True @@ -157,7 +159,7 @@ def sniff_format(handle): handle.seek(0) sniffer = csv.Sniffer() if not sniffer.has_header(peek): - logger.critical(f"The given sample sheet does not appear to contain a header.") + logger.critical("The given sample sheet does not appear to contain a header.") sys.exit(1) dialect = sniffer.sniff(peek) return dialect @@ -195,7 +197,8 @@ def check_samplesheet(file_in, file_out): reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) # Validate the existence of the expected header columns. if not required_columns.issubset(reader.fieldnames): - logger.critical(f"The sample sheet **must** contain the column headers: {', '.join(required_columns)}.") + req_cols = ", ".join(required_columns) + logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") sys.exit(1) # Validate each row. checker = RowChecker() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..0d62beb --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. +[tool.black] +line-length = 120 +target_version = ["py37", "py38", "py39", "py310"] + +[tool.isort] +profile = "black" +known_first_party = ["nf_core"] +multi_line_output = 3 From 152140d07574b8d299940a66cb219b84abe38b94 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Thu, 1 Sep 2022 17:45:57 +0200 Subject: [PATCH 05/78] update GRCh38 reference source --- bin/epaa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/epaa.py b/bin/epaa.py index c282009..0434447 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -1215,7 +1215,7 @@ def __main__(): metadata = [] proteins = [] - references = {"GRCh37": "http://feb2014.archive.ensembl.org", "GRCh38": "http://mar2017.archive.ensembl.org"} + references = {"GRCh37": "http://feb2014.archive.ensembl.org", "GRCh38": "http://aug2017.archive.ensembl.org"} global transcriptProteinMap global transcriptSwissProtMap From 2cb74a0020b4b6c9870df1cdacea5f305ea7316a Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Fri, 2 Sep 2022 11:00:57 +0200 Subject: [PATCH 06/78] Update CHANGELOG.md Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2638218..abeb554 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` -- [#176](https://github.com/nf-core/epitopeprediction/pull/176) - Update to nf-core template `2.5.1` +- [#177](https://github.com/nf-core/epitopeprediction/pull/177) - Update to nf-core template `2.5.1` ### `Fixed` From b82312905cd508f6c73db8a0d1681a3fb7676797 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Fri, 2 Sep 2022 11:04:31 +0200 Subject: [PATCH 07/78] Update README.md Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0405e62..da073dd 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/epitopeprediction) +[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7?labelColor=000000)](https://tower.nf/launch?pipeline=https://github.com/nf-core/epitopeprediction) [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23epitopeprediction-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/epitopeprediction) [![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core) From e83dabd035c3c19ff839f393a4525fee43c82538 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Fri, 2 Sep 2022 11:04:40 +0200 Subject: [PATCH 08/78] Update README.md Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index da073dd..aa432e3 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![GitHub Actions Linting Status](https://github.com/nf-core/epitopeprediction/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/epitopeprediction/actions?query=workflow%3A%22nf-core+linting%22) [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/epitopeprediction/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3564666-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3564666) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg?labelColor=000000)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) From 6565ee51f4f025160a9cfe104fe20175167e9e0b Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Wed, 7 Sep 2022 15:45:01 +0200 Subject: [PATCH 09/78] fix indentation --- bin/epaa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/epaa.py b/bin/epaa.py index 20de23e..a991aaa 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -11,7 +11,6 @@ import itertools import pandas as pd import numpy as np - import epytope.Core.Generator as generator import math import json @@ -467,6 +466,7 @@ def read_vcf(filename, pass_only=True): vs_new.log_metadata(m, v.get_metadata(m)) dict_vars[v] = vs_new + print(dict_vars) return dict_vars.values(), transcript_ids, final_metadata_list From 51bb5d27100cdaed9256ac1e785f27cd1f3a4fcd Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Tue, 13 Sep 2022 10:39:57 +0200 Subject: [PATCH 10/78] bug fixes and cleanup --- bin/epaa.py | 82 ++++++++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 39 deletions(-) diff --git a/bin/epaa.py b/bin/epaa.py index a991aaa..a5e5dbc 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -66,6 +66,7 @@ def get_epytope_annotation(vt, p, r, alt): return p, r, alt return position, reference, alternative + def check_min_req_GSvar(row): """ checking the presence of mandatory columns @@ -147,12 +148,14 @@ def read_GSvar(filename, pass_only=True): ) else False ) + # old GSvar version if "coding_and_splicing_details" in line: mut_type = line.get("variant_details", "") annots = RE.findall(line["coding_and_splicing_details"]) else: mut_type = line.get("variant_type", "") + # Gene, transcript number, type, impact, exon/intron number, HGVS.c, HGVS.p, Pfam annots = RE.findall(line["coding_and_splicing"]) isyn = mut_type == "synonymous_variant" @@ -176,20 +179,22 @@ def read_GSvar(filename, pass_only=True): coding = dict() for annot in annots: - a_gene, nm_id, a_mut_type, exon, trans_coding, trans_pos, prot_coding, prot_start = annot - if "NM" in nm_id: + a_gene, transcript_id, a_mut_type, exon, trans_coding, trans_pos, prot_coding, prot_start = annot + if "NM" in transcript_id: ID_SYSTEM_USED = EIdentifierTypes.REFSEQ if "stop_gained" not in mut_type: if not gene: gene = a_gene if not mut_type: mut_type = a_mut_type - nm_id = nm_id.split(".")[0] - coding[nm_id] = MutationSyntax( - nm_id, int(trans_pos.split("_")[0]) - 1, int(prot_start) - 1, trans_coding, prot_coding + #TODO with the next epytope release we can deal with transcript id version + transcript_id = transcript_id.split(".")[0] + + coding[transcript_id] = MutationSyntax( + transcript_id, int(trans_pos.split("_")[0]) - 1, int(prot_start) - 1, trans_coding, prot_coding ) - transcript_ids.append(nm_id) + transcript_ids.append(transcript_id) if coding: var = Variant( mut_id, @@ -213,7 +218,7 @@ def read_GSvar(filename, pass_only=True): transToVar = {} - # fix because of memory/timing issues due to combinatoric explosion + # fix because of memory/timing issues due to combinatorial explosion for v in list_vars: for trans_id in v.coding.keys(): transToVar.setdefault(trans_id, []).append(v) @@ -302,7 +307,7 @@ def read_vcf(filename, pass_only=True): vt = VariationType.SNP elif record.is_indel: #@TODO Potential bug here if v_list is really list - if len(v_list) % 3 == 0: # no frameshift + if len(alternative_list[0]) % 3 == 0: # no frameshift if record.is_deletion: vt = VariationType.DEL else: @@ -365,7 +370,6 @@ def read_vcf(filename, pass_only=True): gene = a_gene_id #TODO with the new epytope release we will support transcript IDs with version - # there are no isoforms in biomart transcript_id = transcript_id.split(".")[0] if 'NM' in transcript_id: ID_SYSTEM_USED = EIdentifierTypes.REFSEQ @@ -416,36 +420,36 @@ def read_vcf(filename, pass_only=True): coding[transcript_id] = MutationSyntax( transcript_id, tpos, ppos, split_coding_c[-1], split_coding_p[-1]) transcript_ids.append(transcript_id) - if coding: - pos, reference, alternative = get_epytope_annotation( - vt, genomic_position, reference, str(alt)) - var = Variant("line" + str(num), vt, chr, pos, reference, - alternative, coding, isHomozygous, isSynonymous) - var.gene = gene - var.log_metadata("vardbid", variation_dbid) - final_metadata_list.append("vardbid") - for metadata_name in metadata_list: - if metadata_name in record.INFO: - final_metadata_list.append(metadata_name) - var.log_metadata( - metadata_name, record.INFO[metadata_name]) - for sample in record.samples: - for format_key in format_list: - if getattr(sample.data, format_key, None) is None: - logger.warning("FORMAT entry {entry} not defined for {genotype}. Skipping.".format( - entry=format_key, genotype=sample.sample)) - continue - format_header = '{}.{}'.format( - sample.sample, format_key) - final_metadata_list.append(format_header) - if isinstance(sample[format_key], list): - format_value = ','.join( - [str(i) for i in sample[format_key]]) - else: - format_value = sample[format_key] - var.log_metadata(format_header, format_value) - dict_vars[var] = var - list_vars.append(var) + if coding: + pos, reference, alternative = get_epytope_annotation( + vt, genomic_position, reference, str(alt)) + var = Variant("line" + str(num), vt, chr, pos, reference, + alternative, coding, isHomozygous, isSynonymous) + var.gene = gene + var.log_metadata("vardbid", variation_dbid) + final_metadata_list.append("vardbid") + for metadata_name in metadata_list: + if metadata_name in record.INFO: + final_metadata_list.append(metadata_name) + var.log_metadata( + metadata_name, record.INFO[metadata_name]) + for sample in record.samples: + for format_key in format_list: + if getattr(sample.data, format_key, None) is None: + logger.warning("FORMAT entry {entry} not defined for {genotype}. Skipping.".format( + entry=format_key, genotype=sample.sample)) + continue + format_header = '{}.{}'.format( + sample.sample, format_key) + final_metadata_list.append(format_header) + if isinstance(sample[format_key], list): + format_value = ','.join( + [str(i) for i in sample[format_key]]) + else: + format_value = sample[format_key] + var.log_metadata(format_header, format_value) + dict_vars[var] = var + list_vars.append(var) else: logger.error("No supported variant annotation string found. Aborting.") sys.exit(1) From 430a65a99b0f691cfb507faf21f18aa8f155d24f Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Tue, 13 Sep 2022 11:04:23 +0200 Subject: [PATCH 11/78] update nf-core modules multiqc and dumpsoftwareversions --- modules.json | 4 ++-- .../modules/custom/dumpsoftwareversions/main.nf | 8 ++++---- .../templates/dumpsoftwareversions.py | 14 ++++++-------- modules/nf-core/modules/multiqc/main.nf | 9 +++++---- modules/nf-core/modules/multiqc/meta.yml | 7 +++++-- 5 files changed, 22 insertions(+), 20 deletions(-) diff --git a/modules.json b/modules.json index 12be9cd..65cbe3d 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "modules": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" + "git_sha": "5e7b1ef9a5a2d9258635bcbf70fcf37dacd1b247" }, "gunzip": { "branch": "master", @@ -15,7 +15,7 @@ }, "multiqc": { "branch": "master", - "git_sha": "5138acca0985ca01c38a1c4fba917d83772b1106" + "git_sha": "5587389874dac9c9953a2ab6f01d49af81969492" } } } diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf index 12293ef..34b50b9 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf @@ -1,11 +1,11 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_low' + label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.12" : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.13a' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : + 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" input: path versions diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index 787bdb7..d139039 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -1,10 +1,9 @@ #!/usr/bin/env python +import yaml import platform from textwrap import dedent -import yaml - def _make_versions_html(versions): html = [ @@ -59,12 +58,11 @@ def _make_versions_html(versions): for process, process_versions in versions_by_process.items(): module = process.split(":")[-1] try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) + assert versions_by_module[module] == process_versions, ( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) except KeyError: versions_by_module[module] = process_versions diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf index 1e7d6af..d10dae6 100644 --- a/modules/nf-core/modules/multiqc/main.nf +++ b/modules/nf-core/modules/multiqc/main.nf @@ -1,14 +1,15 @@ process MULTIQC { label 'process_medium' - conda (params.enable_conda ? 'bioconda::multiqc=1.13a' : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : - 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" - tuple path(multiqc_config), path(multiqc_logo) + path(multiqc_config) + path(multiqc_logo) output: path "*multiqc_report.html", emit: report diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/modules/multiqc/meta.yml index bf3a27f..a1029f3 100644 --- a/modules/nf-core/modules/multiqc/meta.yml +++ b/modules/nf-core/modules/multiqc/meta.yml @@ -12,6 +12,7 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] + input: - multiqc_files: type: file @@ -19,12 +20,13 @@ input: List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - multiqc_config: type: file - description: Config yml for MultiQC + description: Optional config yml for MultiQC pattern: "*.{yml,yaml}" - multiqc_logo: type: file - description: Logo file for MultiQC + description: Optional logo file for MultiQC pattern: "*.{png}" + output: - report: type: file @@ -46,3 +48,4 @@ authors: - "@abhi18av" - "@bunop" - "@drpatelh" + - "@jfy133" From 7084d11d4fe2c78b2e92315664c24051609c13c2 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Tue, 13 Sep 2022 12:48:53 +0200 Subject: [PATCH 12/78] fix multiqc call --- workflows/epitopeprediction.nf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/workflows/epitopeprediction.nf b/workflows/epitopeprediction.nf index 64c59aa..120fa8b 100644 --- a/workflows/epitopeprediction.nf +++ b/workflows/epitopeprediction.nf @@ -22,9 +22,10 @@ if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input sample ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = [file("$projectDir/assets/multiqc_config.yml", checkIfExists: true), file("$projectDir/assets/nf-core-epitopeprediction_logo_light.png", checkIfExists: true)] +ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() ch_multiqc_configs = Channel.from(ch_multiqc_config).mix(ch_multiqc_custom_config).ifEmpty([]) +ch_multiqc_logo = Channel.from(file("$projectDir/assets/nf-core-epitopeprediction_logo_light.png", checkIfExists: true)) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -449,7 +450,9 @@ workflow EPITOPEPREDICTION { ch_multiqc_files = ch_multiqc_files.mix( ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') ) MULTIQC ( - ch_multiqc_files.collect(), ch_multiqc_configs.collect() + ch_multiqc_files.collect(), + ch_multiqc_configs.collect(), + ch_multiqc_logo ) multiqc_report = MULTIQC.out.report.toList() } From a070561a1ba57d2d14966b47d66c26864ef38c33 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Tue, 13 Sep 2022 12:56:22 +0200 Subject: [PATCH 13/78] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index abeb554..0cb9485 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` - [#177](https://github.com/nf-core/epitopeprediction/pull/177) - Update to nf-core template `2.5.1` +- [#178](https://github.com/nf-core/epitopeprediction/pull/178) - Update MultiQC to `1.13` ### `Fixed` From 24b6b8ffed697a9529b30f0c46daa21ae99fe3bf Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Tue, 13 Sep 2022 13:02:10 +0200 Subject: [PATCH 14/78] run prettier --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cb9485..35658fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` - [#177](https://github.com/nf-core/epitopeprediction/pull/177) - Update to nf-core template `2.5.1` -- [#178](https://github.com/nf-core/epitopeprediction/pull/178) - Update MultiQC to `1.13` +- [#178](https://github.com/nf-core/epitopeprediction/pull/178) - Update MultiQC to `1.13` ### `Fixed` From 1775a0f76d9d7f7a3f33579b397890d8c2915d86 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Thu, 15 Sep 2022 11:59:55 +0200 Subject: [PATCH 15/78] cleanup --- bin/epaa.py | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/bin/epaa.py b/bin/epaa.py index a5e5dbc..8a04fa6 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -352,10 +352,11 @@ def read_vcf(filename, pass_only=True): continue obs, a_mut_type, impact, a_gene, a_gene_id, feature_type, transcript_id, exon, tot_exon, trans_coding, prot_coding, cdna, cds, aa, distance, warnings = annots types.append(a_mut_type) - tpos = 0 ppos = 0 positions = '' + isSynonymous = (a_mut_type == "synonymous_variant") + gene = a_gene_id # get cds/protein positions and convert mutation syntax to epytope format if trans_coding != '': @@ -366,9 +367,6 @@ def read_vcf(filename, pass_only=True): positions = re.findall(r'\d+', prot_coding) tpos = int(positions[0]) - 1 - isSynonymous = (a_mut_type == "synonymous_variant") - gene = a_gene_id - #TODO with the new epytope release we will support transcript IDs with version transcript_id = transcript_id.split(".")[0] if 'NM' in transcript_id: @@ -393,18 +391,6 @@ def read_vcf(filename, pass_only=True): cds_pos = split_annotation[vep_fields["cds_position"]] # not sure yet if this is always the case if cds_pos: - """ - https://varnomen.hgvs.org/recommendations/general/ - “c.” for a coding DNA reference sequence - “g.” for a linear genomic reference sequence - “m.” for a mitochondrial DNA reference sequence - “n.” for a non-coding DNA reference sequence - “o.” for a circular genomic reference sequence - “p.” for a protein reference sequence - “r.” for an RNA reference sequence (transcript) - - We could filter for coding and genomic here. - """ ppos = -1 prot_coding = "" split_coding_c = c_coding.split(':') @@ -453,10 +439,9 @@ def read_vcf(filename, pass_only=True): else: logger.error("No supported variant annotation string found. Aborting.") sys.exit(1) - transToVar = {} - # fix because of memory/timing issues due to combinatoric explosion + # fix because of memory/timing issues due to combinatorial explosion for v in list_vars: for trans_id in v.coding.keys(): transToVar.setdefault(trans_id, []).append(v) @@ -470,7 +455,6 @@ def read_vcf(filename, pass_only=True): vs_new.log_metadata(m, v.get_metadata(m)) dict_vars[v] = vs_new - print(dict_vars) return dict_vars.values(), transcript_ids, final_metadata_list From 7c07f648d5194ce9c0b7e648ec1e4ea9870c4ef3 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Thu, 15 Sep 2022 14:49:53 +0200 Subject: [PATCH 16/78] reformat using black --- bin/epaa.py | 132 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 89 insertions(+), 43 deletions(-) diff --git a/bin/epaa.py b/bin/epaa.py index 8a04fa6..85dfd6f 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -188,7 +188,7 @@ def read_GSvar(filename, pass_only=True): if not mut_type: mut_type = a_mut_type - #TODO with the next epytope release we can deal with transcript id version + # TODO with the next epytope release we can deal with transcript id version transcript_id = transcript_id.split(".")[0] coding[transcript_id] = MutationSyntax( @@ -246,7 +246,31 @@ def read_vcf(filename, pass_only=True): vep_header_available = False # default VEP fields - vep_fields = {"allele": 0,"consequence": 1,"impact": 2,"symbol": 3,"gene": 4,"feature_type": 5,"feature": 6,"biotype": 7,"exon": 8,"intron": 9,"hgvsc": 10,"hgvsp": 11,"cdna_position": 12,"cds_position": 13, "protein_position": 14,"amino_acids": 15,"codons": 16,"existing_variation": 17,"distance": 18,"strand": 19,"flags": 20,"symbol_source": 21,"hgnc_id":22} + vep_fields = { + "allele": 0, + "consequence": 1, + "impact": 2, + "symbol": 3, + "gene": 4, + "feature_type": 5, + "feature": 6, + "biotype": 7, + "exon": 8, + "intron": 9, + "hgvsc": 10, + "hgvsp": 11, + "cdna_position": 12, + "cds_position": 13, + "protein_position": 14, + "amino_acids": 15, + "codons": 16, + "existing_variation": 17, + "distance": 18, + "strand": 19, + "flags": 20, + "symbol_source": 21, + "hgnc_id": 22, + } VEP_KEY = "CSQ" SNPEFF_KEY = "ANN" @@ -265,7 +289,7 @@ def read_vcf(filename, pass_only=True): # determine format of given VEP annotation if VEP_KEY in vcf_reader.infos: split_vep_def = vcf_reader.infos[VEP_KEY] - for idx, field in enumerate(split_vep_def.desc.split()[-1].split('|')): + for idx, field in enumerate(split_vep_def.desc.split()[-1].split("|")): vep_fields[field.strip().lower()] = idx vep_header_available = True @@ -281,7 +305,7 @@ def read_vcf(filename, pass_only=True): genotye_dict = {"het": False, "hom": True, "ref": True} for num, record in enumerate(vl): - chr = record.CHROM.strip('chr') + chr = record.CHROM.strip("chr") genomic_position = record.POS variation_dbid = record.ID reference = str(record.REF) @@ -306,7 +330,7 @@ def read_vcf(filename, pass_only=True): if record.is_snp: vt = VariationType.SNP elif record.is_indel: - #@TODO Potential bug here if v_list is really list + # @TODO Potential bug here if v_list is really list if len(alternative_list[0]) % 3 == 0: # no frameshift if record.is_deletion: vt = VariationType.DEL @@ -345,47 +369,64 @@ def read_vcf(filename, pass_only=True): # SNPEFF annotation if SNPEFF_KEY in record.INFO: for annraw in record.INFO[SNPEFF_KEY]: - annots = annraw.split('|') + annots = annraw.split("|") if len(annots) != 16: logger.warning( - "read_vcf: Omitted row! Mandatory columns not present in annotation field (ANN). \n Have you annotated your VCF file with SnpEff?") + "read_vcf: Omitted row! Mandatory columns not present in annotation field (ANN). \n Have you annotated your VCF file with SnpEff?" + ) continue - obs, a_mut_type, impact, a_gene, a_gene_id, feature_type, transcript_id, exon, tot_exon, trans_coding, prot_coding, cdna, cds, aa, distance, warnings = annots + ( + obs, + a_mut_type, + impact, + a_gene, + a_gene_id, + feature_type, + transcript_id, + exon, + tot_exon, + trans_coding, + prot_coding, + cdna, + cds, + aa, + distance, + warnings, + ) = annots types.append(a_mut_type) tpos = 0 ppos = 0 - positions = '' - isSynonymous = (a_mut_type == "synonymous_variant") + positions = "" + isSynonymous = a_mut_type == "synonymous_variant" gene = a_gene_id # get cds/protein positions and convert mutation syntax to epytope format - if trans_coding != '': - positions = re.findall(r'\d+', trans_coding) + if trans_coding != "": + positions = re.findall(r"\d+", trans_coding) ppos = int(positions[0]) - 1 - if prot_coding != '': - positions = re.findall(r'\d+', prot_coding) + if prot_coding != "": + positions = re.findall(r"\d+", prot_coding) tpos = int(positions[0]) - 1 - #TODO with the new epytope release we will support transcript IDs with version + # TODO with the new epytope release we will support transcript IDs with version transcript_id = transcript_id.split(".")[0] - if 'NM' in transcript_id: + if "NM" in transcript_id: ID_SYSTEM_USED = EIdentifierTypes.REFSEQ # take online coding variants into account, epytope cannot deal with stop gain variants right now - if not prot_coding or 'stop_gained' in a_mut_type: + if not prot_coding or "stop_gained" in a_mut_type: continue - coding[transcript_id] = MutationSyntax( - transcript_id, ppos, tpos, trans_coding, prot_coding) + coding[transcript_id] = MutationSyntax(transcript_id, ppos, tpos, trans_coding, prot_coding) transcript_ids.append(transcript_id) else: if not vep_header_available: logger.warning("No CSQ definition found in header, trying to map to default VEP format string.") for annotation in record.INFO[VEP_KEY]: - split_annotation = annotation.split('|') - isSynonymous = 'synonymous' in split_annotation[vep_fields['consequence']] - gene = split_annotation[vep_fields['gene']] + split_annotation = annotation.split("|") + isSynonymous = "synonymous" in split_annotation[vep_fields["consequence"]] + gene = split_annotation[vep_fields["gene"]] c_coding = split_annotation[vep_fields["hgvsc"]] p_coding = split_annotation[vep_fields["hgvsp"]] cds_pos = split_annotation[vep_fields["cds_position"]] @@ -393,44 +434,50 @@ def read_vcf(filename, pass_only=True): if cds_pos: ppos = -1 prot_coding = "" - split_coding_c = c_coding.split(':') - split_coding_p = p_coding.split(':') + split_coding_c = c_coding.split(":") + split_coding_p = p_coding.split(":") # we still need the new functionality here in epytope to query with IDs with version (ENTxxx.x) - transcript_id = split_coding_c[0] if split_coding_c[0] else split_annotation[vep_fields["feature"]] - transcript_id = transcript_id.split('.')[0] + transcript_id = ( + split_coding_c[0] if split_coding_c[0] else split_annotation[vep_fields["feature"]] + ) + transcript_id = transcript_id.split(".")[0] - tpos = int(cds_pos.split('/')[0].split('-')[0]) - 1 + tpos = int(cds_pos.split("/")[0].split("-")[0]) - 1 if split_annotation[vep_fields["protein_position"]]: - ppos = int(split_annotation[vep_fields["protein_position"]].split('-')[0].split('/')[0]) - 1 + ppos = ( + int(split_annotation[vep_fields["protein_position"]].split("-")[0].split("/")[0]) + - 1 + ) coding[transcript_id] = MutationSyntax( - transcript_id, tpos, ppos, split_coding_c[-1], split_coding_p[-1]) + transcript_id, tpos, ppos, split_coding_c[-1], split_coding_p[-1] + ) transcript_ids.append(transcript_id) if coding: - pos, reference, alternative = get_epytope_annotation( - vt, genomic_position, reference, str(alt)) - var = Variant("line" + str(num), vt, chr, pos, reference, - alternative, coding, isHomozygous, isSynonymous) + pos, reference, alternative = get_epytope_annotation(vt, genomic_position, reference, str(alt)) + var = Variant( + "line" + str(num), vt, chr, pos, reference, alternative, coding, isHomozygous, isSynonymous + ) var.gene = gene var.log_metadata("vardbid", variation_dbid) final_metadata_list.append("vardbid") for metadata_name in metadata_list: if metadata_name in record.INFO: final_metadata_list.append(metadata_name) - var.log_metadata( - metadata_name, record.INFO[metadata_name]) + var.log_metadata(metadata_name, record.INFO[metadata_name]) for sample in record.samples: for format_key in format_list: if getattr(sample.data, format_key, None) is None: - logger.warning("FORMAT entry {entry} not defined for {genotype}. Skipping.".format( - entry=format_key, genotype=sample.sample)) + logger.warning( + "FORMAT entry {entry} not defined for {genotype}. Skipping.".format( + entry=format_key, genotype=sample.sample + ) + ) continue - format_header = '{}.{}'.format( - sample.sample, format_key) + format_header = "{}.{}".format(sample.sample, format_key) final_metadata_list.append(format_header) if isinstance(sample[format_key], list): - format_value = ','.join( - [str(i) for i in sample[format_key]]) + format_value = ",".join([str(i) for i in sample[format_key]]) else: format_value = sample[format_key] var.log_metadata(format_header, format_value) @@ -1249,8 +1296,7 @@ def __main__(): metadata = [] proteins = [] - references = {'GRCh37': 'http://feb2014.archive.ensembl.org', - 'GRCh38': 'http://aug2017.archive.ensembl.org'} + references = {"GRCh37": "http://feb2014.archive.ensembl.org", "GRCh38": "http://aug2017.archive.ensembl.org"} global transcriptProteinMap global transcriptSwissProtMap From 09e86551fe92909eebcda08b88122d991d00aacf Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Mon, 19 Sep 2022 16:04:09 +0200 Subject: [PATCH 17/78] outsource to methods --- bin/epaa.py | 84 +++++++++++++++++++++++++++++------------------------ 1 file changed, 46 insertions(+), 38 deletions(-) diff --git a/bin/epaa.py b/bin/epaa.py index 85dfd6f..1dd9863 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -23,7 +23,6 @@ from epytope.IO.UniProtAdapter import UniProtDB from epytope.Core.Allele import Allele from epytope.Core.Peptide import Peptide -from epytope.IO import FileReader from Bio import SeqUtils from datetime import datetime from string import Template @@ -87,6 +86,44 @@ def check_min_req_GSvar(row): return False +def determine_variant_type(record, alternative): + vt = VariationType.UNKNOWN + if record.is_snp: + vt = VariationType.SNP + elif record.is_indel: + if len(alternative) % 3 == 0: # no frameshift + if record.is_deletion: + vt = VariationType.DEL + else: + vt = VariationType.INS + else: # frameshift + if record.is_deletion: + vt = VariationType.FSDEL + else: + vt = VariationType.FSINS + return vt + + +def determine_zygosity(record): + genotye_dict = {"het": False, "hom": True, "ref": True} + isHomozygous = False + if "HOM" in record.INFO: + isHomozygous = record.INFO["HOM"] == 1 + elif "SGT" in record.INFO: + zygosity = record.INFO["SGT"].split("->")[1] + if zygosity in genotye_dict: + isHomozygous = genotye_dict[zygosity] + else: + if zygosity[0] == zygosity[1]: + isHomozygous = True + else: + isHomozygous = False + else: + for sample in record.samples: + if "GT" in sample.data: + isHomozygous = sample.data["GT"] == "1/1" + return isHomozygous + def read_GSvar(filename, pass_only=True): """ reads GSvar and tsv files (tab sep files in context of genetic variants), omitting and warning about rows missing @@ -302,7 +339,6 @@ def read_vcf(filename, pass_only=True): dict_vars = {} list_vars = [] transcript_ids = [] - genotye_dict = {"het": False, "hom": True, "ref": True} for num, record in enumerate(vl): chr = record.CHROM.strip("chr") @@ -326,40 +362,9 @@ def read_vcf(filename, pass_only=True): DELETION => s = slice(pos, pos+len(ref)) (create slice that will be removed) del seq[s] (remove) """ - vt = VariationType.UNKNOWN - if record.is_snp: - vt = VariationType.SNP - elif record.is_indel: - # @TODO Potential bug here if v_list is really list - if len(alternative_list[0]) % 3 == 0: # no frameshift - if record.is_deletion: - vt = VariationType.DEL - else: - vt = VariationType.INS - else: # frameshift - if record.is_deletion: - vt = VariationType.FSDEL - else: - vt = VariationType.FSINS - gene = "" - for alt in alternative_list: - isHomozygous = False - if "HOM" in record.INFO: - isHomozygous = record.INFO["HOM"] == 1 - elif "SGT" in record.INFO: - zygosity = record.INFO["SGT"].split("->")[1] - if zygosity in genotye_dict: - isHomozygous = genotye_dict[zygosity] - else: - if zygosity[0] == zygosity[1]: - isHomozygous = True - else: - isHomozygous = False - else: - for sample in record.samples: - if "GT" in sample.data: - isHomozygous = sample.data["GT"] == "1/1" + isHomozygous = determine_zygosity(record) + vt = determine_variant_type(record, alt) # check if we have SNPEFF or VEP annotated variants, otherwise abort if record.INFO.get(SNPEFF_KEY, False) or record.INFO.get(VEP_KEY, False): @@ -527,9 +532,9 @@ def read_peptide_input(filename): return peptides, metadata -# parse protein_groups of MaxQuant output to get protein intensitiy values +# parse protein_groups of MaxQuant output to get protein intensity values def read_protein_quant(filename): - # protein id: sample1: intensity, sample2: instensity: + # protein id: sample1: intensity, sample2: intensity: intensities = {} with open(filename, "r") as inp: @@ -1400,7 +1405,6 @@ def __main__(): try: complete_df = pd.concat(pred_dataframes, sort=True) # replace method names with method names with version - # complete_df.replace({'method': methods}, inplace=True) complete_df["method"] = complete_df["method"].apply(lambda x: x.lower() + "-" + methods[x.lower()]) predictions_available = True except: @@ -1410,6 +1414,9 @@ def __main__(): # include wild type sequences to dataframe if specified if args.wild_type: + if args.peptides: + logger.warning("Wildtype sequence generation not available with peptide input.") + pass wt_sequences = generate_wt_seqs(all_peptides_filtered) complete_df["wt sequence"] = complete_df.apply( lambda row: create_wt_seq_column_value(row, wt_sequences), axis=1 @@ -1439,6 +1446,7 @@ def __main__(): "variant type", "method", ] + for c in complete_df.columns: if c not in columns_tiles: columns_tiles.append(c) From bb201ba525c99685eef45a35450faa2d665b392d Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Thu, 22 Sep 2022 15:50:16 +0200 Subject: [PATCH 18/78] optimize variant metadata retrieval, formatting --- bin/epaa.py | 193 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 110 insertions(+), 83 deletions(-) diff --git a/bin/epaa.py b/bin/epaa.py index 1dd9863..f06c84b 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -15,7 +15,6 @@ import math import json -from collections import defaultdict from epytope.IO.MartsAdapter import MartsAdapter from epytope.Core.Variant import Variant, VariationType, MutationSyntax from epytope.EpitopePrediction import EpitopePredictorFactory @@ -25,7 +24,6 @@ from epytope.Core.Peptide import Peptide from Bio import SeqUtils from datetime import datetime -from string import Template __author__ = "Christopher Mohr" VERSION = "1.1" @@ -91,12 +89,12 @@ def determine_variant_type(record, alternative): if record.is_snp: vt = VariationType.SNP elif record.is_indel: - if len(alternative) % 3 == 0: # no frameshift + if len(alternative) % 3 == 0: # no frameshift if record.is_deletion: vt = VariationType.DEL else: vt = VariationType.INS - else: # frameshift + else: # frameshift if record.is_deletion: vt = VariationType.FSDEL else: @@ -124,6 +122,7 @@ def determine_zygosity(record): isHomozygous = sample.data["GT"] == "1/1" return isHomozygous + def read_GSvar(filename, pass_only=True): """ reads GSvar and tsv files (tab sep files in context of genetic variants), omitting and warning about rows missing @@ -277,6 +276,7 @@ def read_vcf(filename, pass_only=True): reads vcf files returns a list of epytope variants :param filename: /path/to/file + :param boolean pass_only: only consider variants that passed the filter (default: True) :return: list of epytope variants """ global ID_SYSTEM_USED @@ -341,7 +341,7 @@ def read_vcf(filename, pass_only=True): transcript_ids = [] for num, record in enumerate(vl): - chr = record.CHROM.strip("chr") + chromosome = record.CHROM.strip("chr") genomic_position = record.POS variation_dbid = record.ID reference = str(record.REF) @@ -461,7 +461,15 @@ def read_vcf(filename, pass_only=True): if coding: pos, reference, alternative = get_epytope_annotation(vt, genomic_position, reference, str(alt)) var = Variant( - "line" + str(num), vt, chr, pos, reference, alternative, coding, isHomozygous, isSynonymous + "line" + str(num), + vt, + chromosome, + pos, + reference, + alternative, + coding, + isHomozygous, + isSynonymous, ) var.gene = gene var.log_metadata("vardbid", variation_dbid) @@ -494,6 +502,7 @@ def read_vcf(filename, pass_only=True): transToVar = {} # fix because of memory/timing issues due to combinatorial explosion + for v in list_vars: for trans_id in v.coding.keys(): transToVar.setdefault(trans_id, []).append(v) @@ -590,31 +599,31 @@ def create_transcript_column_value(pep): return ",".join(set([x.transcript_id.split(":")[0] for x in set(pep.get_all_transcripts())])) -def create_mutationsyntax_column_value(pep): - transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - variants = [] +def create_mutationsyntax_column_value(pep, pep_dictionary): + # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] + # variants = [] syntaxes = [] - for t in transcript_ids: - variants.extend([v for v in pep.get_variants_by_protein(t)]) - transcript_ids = set([t.split(":")[0] for t in transcript_ids]) - for v in set(variants): + # for t in transcript_ids: + # variants.extend([v for v in pep.get_variants_by_protein(t)]) + # transcript_ids = set([t.split(":")[0] for t in transcript_ids]) + for v in set(pep_dictionary[pep]): for c in v.coding: - if c in transcript_ids: - syntaxes.append(v.coding[c]) + # if c in transcript_ids: + syntaxes.append(v.coding[c]) return ",".join(set([y.aaMutationSyntax for y in syntaxes])) -def create_mutationsyntax_genome_column_value(pep): - transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - variants = [] +def create_mutationsyntax_genome_column_value(pep, pep_dictionary): + # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] + # variants = [] syntaxes = [] - for t in transcript_ids: - variants.extend([v for v in pep.get_variants_by_protein(t)]) - transcript_ids = set([t.split(":")[0] for t in transcript_ids]) - for v in set(variants): + # for t in transcript_ids: + # variants.extend([v for v in pep.get_variants_by_protein(t)]) + # transcript_ids = set([t.split(":")[0] for t in transcript_ids]) + for v in set(pep_dictionary[pep]): for c in v.coding: - if c in transcript_ids: - syntaxes.append(v.coding[c]) + # if c in transcript_ids: + syntaxes.append(v.coding[c]) return ",".join(set([y.cdsMutationSyntax for y in syntaxes])) @@ -624,70 +633,70 @@ def create_variationfilelinenumber_column_value(pep): return ",".join([str(int(y.id.replace("line", "")) + 1) for y in vf]) -def create_gene_column_value(pep): - transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - variants = [] - for t in transcript_ids: - variants.extend([v for v in pep.get_variants_by_protein(t)]) - return ",".join(set([y.gene for y in set(variants)])) +def create_gene_column_value(pep, pep_dictionary): + # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] + # variants = [] + # for t in transcript_ids: + # variants.extend([v for v in pep.get_variants_by_protein(t)]) + return ",".join(set([y.gene for y in set(pep_dictionary[pep])])) -def create_variant_pos_column_value(pep): - transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - variants = [] - for t in transcript_ids: - variants.extend([v for v in pep.get_variants_by_protein(t)]) - return ",".join(set(["{}".format(y.genomePos) for y in set(variants)])) +def create_variant_pos_column_value(pep, pep_dictionary): + # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] + # variants = [] + # for t in transcript_ids: + # variants.extend([v for v in pep.get_variants_by_protein(t)]) + return ",".join(set(["{}".format(y.genomePos) for y in set(pep_dictionary[pep])])) -def create_variant_chr_column_value(pep): - transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - variants = [] - for t in transcript_ids: - variants.extend([v for v in pep.get_variants_by_protein(t)]) - return ",".join(set(["{}".format(y.chrom) for y in set(variants)])) +def create_variant_chr_column_value(pep, pep_dictionary): + # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] + # variants = [] + # for t in transcript_ids: + # variants.extend([v for v in pep.get_variants_by_protein(t)]) + return ",".join(set(["{}".format(y.chrom) for y in set(pep_dictionary[pep])])) -def create_variant_type_column_value(pep): +def create_variant_type_column_value(pep, pep_dictionary): types = {0: "SNP", 1: "DEL", 2: "INS", 3: "FSDEL", 4: "FSINS", 5: "UNKNOWN"} - transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - variants = [] - for t in transcript_ids: - variants.extend([v for v in pep.get_variants_by_protein(t)]) - return ",".join(set([types[y.type] for y in set(variants)])) + # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] + # variants = [] + # for t in transcript_ids: + # variants.extend([v for v in pep.get_variants_by_protein(t)]) + return ",".join(set([types[y.type] for y in set(pep_dictionary[pep])])) -def create_variant_syn_column_value(pep): - transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - variants = [] - for t in transcript_ids: - variants.extend([v for v in pep.get_variants_by_protein(t)]) - return ",".join(set([str(y.isSynonymous) for y in set(variants)])) +def create_variant_syn_column_value(pep, pep_dictionary): + # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] + # variants = [] + # for t in transcript_ids: + # variants.extend([v for v in pep.get_variants_by_protein(t)]) + return ",".join(set([str(y.isSynonymous) for y in set(pep_dictionary[pep])])) -def create_variant_hom_column_value(pep): - transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - variants = [] - for t in transcript_ids: - variants.extend([v for v in pep.get_variants_by_protein(t)]) - return ",".join(set([str(y.isHomozygous) for y in set(variants)])) +def create_variant_hom_column_value(pep, pep_dictionary): + # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] + # variants = [] + # for t in transcript_ids: + # variants.extend([v for v in pep.get_variants_by_protein(t)]) + return ",".join(set([str(y.isHomozygous) for y in set(pep_dictionary[pep])])) -def create_coding_column_value(pep): - transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - variants = [] - for t in transcript_ids: - variants.extend([v for v in pep.get_variants_by_protein(t)]) - return ",".join(set([str(y.coding) for y in set(variants)])) +def create_coding_column_value(pep, pep_dictionary): + # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] + # variants = [] + # for t in transcript_ids: + # variants.extend([v for v in pep.get_variants_by_protein(t)]) + return ",".join(set([str(y.coding) for y in set(pep_dictionary[pep])])) -def create_metadata_column_value(pep, c): - transcript_ids = [x.transcript_id for x in set(pep[0].get_all_transcripts())] - variants = [] - for t in transcript_ids: - variants.extend([v for v in pep[0].get_variants_by_protein(t)]) - meta = set([str(y.get_metadata(c)[0]) for y in set(variants) if len(y.get_metadata(c)) != 0]) +def create_metadata_column_value(pep, c, pep_dictionary): + # transcript_ids = [x.transcript_id for x in set(pep[0].get_all_transcripts())] + # variants = [] + # for t in transcript_ids: + # variants.extend([v for v in pep[0].get_variants_by_protein(t)]) + meta = set([str(y.get_metadata(c)[0]) for y in set(pep_dictionary[pep[0]]) if len(y.get_metadata(c)) != 0]) if len(meta) is 0: return np.nan else: @@ -961,6 +970,17 @@ def generate_wt_seqs(peptides): return wt_dict +def create_peptide_variant_dictionary(peptides): + pep_to_variants = {} + for pep in peptides: + transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] + variants = [] + for t in transcript_ids: + variants.extend([v for v in pep.get_variants_by_protein(t)]) + pep_to_variants[pep] = variants + return pep_to_variants + + def make_predictions_from_variants( variants_all, methods, @@ -1046,17 +1066,23 @@ def make_predictions_from_variants( allele_string_map["%s_%s" % (a, peplen)] = "%s_%i" % (conv_allele, peplen) max_values_matrices["%s_%i" % (conv_allele, peplen)] = get_matrix_max_score(conv_allele, peplen) + pep_to_variants = create_peptide_variant_dictionary(df["sequence"].tolist()) + df["length"] = df["sequence"].map(len) - df["chr"] = df["sequence"].map(create_variant_chr_column_value) - df["pos"] = df["sequence"].map(create_variant_pos_column_value) - df["gene"] = df["sequence"].map(create_gene_column_value) + df["chr"] = df["sequence"].map(lambda x: create_variant_chr_column_value(x, pep_to_variants)) + df["pos"] = df["sequence"].map(lambda x: create_variant_pos_column_value(x, pep_to_variants)) + df["gene"] = df["sequence"].map(lambda x: create_gene_column_value(x, pep_to_variants)) df["transcripts"] = df["sequence"].map(create_transcript_column_value) df["proteins"] = df["sequence"].map(create_protein_column_value) - df["variant type"] = df["sequence"].map(create_variant_type_column_value) - df["synonymous"] = df["sequence"].map(create_variant_syn_column_value) - df["homozygous"] = df["sequence"].map(create_variant_hom_column_value) - df["variant details (genomic)"] = df["sequence"].map(create_mutationsyntax_genome_column_value) - df["variant details (protein)"] = df["sequence"].map(create_mutationsyntax_column_value) + df["variant type"] = df["sequence"].map(lambda x: create_variant_type_column_value(x, pep_to_variants)) + df["synonymous"] = df["sequence"].map(lambda x: create_variant_syn_column_value(x, pep_to_variants)) + df["homozygous"] = df["sequence"].map(lambda x: create_variant_hom_column_value(x, pep_to_variants)) + df["variant details (genomic)"] = df["sequence"].map( + lambda x: create_mutationsyntax_genome_column_value(x, pep_to_variants) + ) + df["variant details (protein)"] = df["sequence"].map( + lambda x: create_mutationsyntax_column_value(x, pep_to_variants) + ) for c in df.columns: if ("HLA-" in str(c) or "H-2-" in str(c)) and "Score" in str(c): @@ -1087,8 +1113,9 @@ def make_predictions_from_variants( df.columns = df.columns.str.replace("Score", "score") df.columns = df.columns.str.replace("Rank", "rank") - for c in metadata: - df[c] = df.apply(lambda row: create_metadata_column_value(row, c), axis=1) + for c in set(metadata): + df[c] = df.apply(lambda row: create_metadata_column_value(row, c, pep_to_variants), axis=1) + pred_dataframes.append(df) statistics = { @@ -1306,7 +1333,7 @@ def __main__(): global transcriptProteinMap global transcriptSwissProtMap - """read in variants or peptides""" + # read in variants or peptides if args.peptides: peptides, metadata = read_peptide_input(args.peptides) else: From 5bd3d0f375dbc9816db13050f5c3ea00f141e7ee Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Thu, 22 Sep 2022 15:52:07 +0200 Subject: [PATCH 19/78] remove commented out code --- bin/epaa.py | 47 ----------------------------------------------- 1 file changed, 47 deletions(-) diff --git a/bin/epaa.py b/bin/epaa.py index f06c84b..a4c4e4d 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -600,29 +600,17 @@ def create_transcript_column_value(pep): def create_mutationsyntax_column_value(pep, pep_dictionary): - # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - # variants = [] syntaxes = [] - # for t in transcript_ids: - # variants.extend([v for v in pep.get_variants_by_protein(t)]) - # transcript_ids = set([t.split(":")[0] for t in transcript_ids]) for v in set(pep_dictionary[pep]): for c in v.coding: - # if c in transcript_ids: syntaxes.append(v.coding[c]) return ",".join(set([y.aaMutationSyntax for y in syntaxes])) def create_mutationsyntax_genome_column_value(pep, pep_dictionary): - # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - # variants = [] syntaxes = [] - # for t in transcript_ids: - # variants.extend([v for v in pep.get_variants_by_protein(t)]) - # transcript_ids = set([t.split(":")[0] for t in transcript_ids]) for v in set(pep_dictionary[pep]): for c in v.coding: - # if c in transcript_ids: syntaxes.append(v.coding[c]) return ",".join(set([y.cdsMutationSyntax for y in syntaxes])) @@ -634,68 +622,35 @@ def create_variationfilelinenumber_column_value(pep): def create_gene_column_value(pep, pep_dictionary): - # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - # variants = [] - # for t in transcript_ids: - # variants.extend([v for v in pep.get_variants_by_protein(t)]) return ",".join(set([y.gene for y in set(pep_dictionary[pep])])) def create_variant_pos_column_value(pep, pep_dictionary): - # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - # variants = [] - # for t in transcript_ids: - # variants.extend([v for v in pep.get_variants_by_protein(t)]) return ",".join(set(["{}".format(y.genomePos) for y in set(pep_dictionary[pep])])) def create_variant_chr_column_value(pep, pep_dictionary): - # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - # variants = [] - # for t in transcript_ids: - # variants.extend([v for v in pep.get_variants_by_protein(t)]) return ",".join(set(["{}".format(y.chrom) for y in set(pep_dictionary[pep])])) def create_variant_type_column_value(pep, pep_dictionary): types = {0: "SNP", 1: "DEL", 2: "INS", 3: "FSDEL", 4: "FSINS", 5: "UNKNOWN"} - - # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - # variants = [] - # for t in transcript_ids: - # variants.extend([v for v in pep.get_variants_by_protein(t)]) return ",".join(set([types[y.type] for y in set(pep_dictionary[pep])])) def create_variant_syn_column_value(pep, pep_dictionary): - # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - # variants = [] - # for t in transcript_ids: - # variants.extend([v for v in pep.get_variants_by_protein(t)]) return ",".join(set([str(y.isSynonymous) for y in set(pep_dictionary[pep])])) def create_variant_hom_column_value(pep, pep_dictionary): - # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - # variants = [] - # for t in transcript_ids: - # variants.extend([v for v in pep.get_variants_by_protein(t)]) return ",".join(set([str(y.isHomozygous) for y in set(pep_dictionary[pep])])) def create_coding_column_value(pep, pep_dictionary): - # transcript_ids = [x.transcript_id for x in set(pep.get_all_transcripts())] - # variants = [] - # for t in transcript_ids: - # variants.extend([v for v in pep.get_variants_by_protein(t)]) return ",".join(set([str(y.coding) for y in set(pep_dictionary[pep])])) def create_metadata_column_value(pep, c, pep_dictionary): - # transcript_ids = [x.transcript_id for x in set(pep[0].get_all_transcripts())] - # variants = [] - # for t in transcript_ids: - # variants.extend([v for v in pep[0].get_variants_by_protein(t)]) meta = set([str(y.get_metadata(c)[0]) for y in set(pep_dictionary[pep[0]]) if len(y.get_metadata(c)) != 0]) if len(meta) is 0: return np.nan @@ -730,8 +685,6 @@ def create_quant_column_value(row, dict): # L = exon length in base-pairs for a gene # C = Number of reads mapped to a gene in a single sample # N = total (unique)mapped reads in the sample - - def create_expression_column_value_for_result(row, dict, deseq, gene_id_lengths): ts = row["gene"].split(",") values = [] From 7a452f1aff5298b89c9f2ee9175c71932a844db0 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Thu, 22 Sep 2022 15:56:00 +0200 Subject: [PATCH 20/78] remove unused function --- bin/epaa.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/bin/epaa.py b/bin/epaa.py index a4c4e4d..6e8cef0 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -615,12 +615,6 @@ def create_mutationsyntax_genome_column_value(pep, pep_dictionary): return ",".join(set([y.cdsMutationSyntax for y in syntaxes])) -def create_variationfilelinenumber_column_value(pep): - v = [x.vars.values() for x in pep.get_all_transcripts()] - vf = list(itertools.chain.from_iterable(v)) - return ",".join([str(int(y.id.replace("line", "")) + 1) for y in vf]) - - def create_gene_column_value(pep, pep_dictionary): return ",".join(set([y.gene for y in set(pep_dictionary[pep])])) From 416569a2d48c480243fcd1a7b32bb957472e06ad Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Fri, 23 Sep 2022 11:28:42 +0200 Subject: [PATCH 21/78] fix wild type sequence generation --- bin/epaa.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/epaa.py b/bin/epaa.py index 6e8cef0..b21dd3a 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -653,12 +653,12 @@ def create_metadata_column_value(pep, c, pep_dictionary): def create_wt_seq_column_value(pep, wtseqs): - transcripts = [x for x in set(pep.get_all_transcripts())] + transcripts = [x for x in set(pep['sequence'].get_all_transcripts())] wt = set( [ - str(wtseqs["{}_{}".format(str(pep), t.transcript_id)]) + str(wtseqs["{}_{}".format(str(pep['sequence']), t.transcript_id)]) for t in transcripts - if bool(t.vars) and "{}_{}".format(str(pep), t.transcript_id) in wtseqs + if bool(t.vars) and "{}_{}".format(str(pep['sequence']), t.transcript_id) in wtseqs ] ) if len(wt) is 0: @@ -996,8 +996,8 @@ def make_predictions_from_variants( df = results[0] else: continue - df = pd.concat(results) + # create method index and remove it from multi-column df = df.stack(level=1) From e8d39457e0c2cc350bc237411cf68876ebf5c45f Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Fri, 23 Sep 2022 11:36:14 +0200 Subject: [PATCH 22/78] fix output channel optional syntax --- modules/local/epytope_peptide_prediction.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/epytope_peptide_prediction.nf b/modules/local/epytope_peptide_prediction.nf index b6a7eee..fc536fd 100644 --- a/modules/local/epytope_peptide_prediction.nf +++ b/modules/local/epytope_peptide_prediction.nf @@ -12,8 +12,8 @@ process EPYTOPE_PEPTIDE_PREDICTION { output: tuple val(meta), path("*.json"), emit: json - tuple val(meta), path("*.tsv"), emit: predicted optional true - tuple val(meta), path("*.fasta"), emit: fasta optional true + tuple val(meta), path("*.tsv"), optional: true, emit: predicted + tuple val(meta), path("*.fasta"), optional: true, emit: fasta path "versions.yml", emit: versions script: From dedec66aa9062001eaaf90b9a133bcefcf6062bd Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Sun, 25 Sep 2022 15:51:02 +0200 Subject: [PATCH 23/78] improve way to determine variant-derived peptides --- bin/epaa.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/bin/epaa.py b/bin/epaa.py index b21dd3a..4e38437 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import os +from symbol import testlist import sys import logging import csv @@ -927,6 +928,15 @@ def create_peptide_variant_dictionary(peptides): pep_to_variants[pep] = variants return pep_to_variants +def is_created_by_variant(peptide): + transcript_ids = [x.transcript_id for x in set(peptide.get_all_transcripts())] + for t in transcript_ids: + prot = peptide.proteins[t] + for start_pos in peptide.proteinPos[t]: + for i in range(start_pos, start_pos+len(peptide)): + if i in prot.vars.keys(): + return True + return False def make_predictions_from_variants( variants_all, @@ -963,10 +973,10 @@ def make_predictions_from_variants( for peplen in range(minlength, maxlength): peptide_gen = generator.generate_peptides_from_proteins(prots, peplen) - peptides_var = [x for x in peptide_gen] + logger.info("Generated peptides at " + str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) - # remove peptides which are not 'variant relevant' - peptides = [x for x in peptides_var if any(x.get_variants_by_protein(y) for y in x.proteins.keys())] + peptides_var = [x for x in peptide_gen] + peptides = [p for p in peptides_var if is_created_by_variant(p)] # filter out self peptides selfies = [str(p) for p in peptides if protein_db.exists(str(p))] From 67867868a4029b7da4423657ba4341241afd1e58 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Sun, 25 Sep 2022 15:51:38 +0200 Subject: [PATCH 24/78] reformat using black --- bin/epaa.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/bin/epaa.py b/bin/epaa.py index 4e38437..43f3e4a 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -654,12 +654,12 @@ def create_metadata_column_value(pep, c, pep_dictionary): def create_wt_seq_column_value(pep, wtseqs): - transcripts = [x for x in set(pep['sequence'].get_all_transcripts())] + transcripts = [x for x in set(pep["sequence"].get_all_transcripts())] wt = set( [ - str(wtseqs["{}_{}".format(str(pep['sequence']), t.transcript_id)]) + str(wtseqs["{}_{}".format(str(pep["sequence"]), t.transcript_id)]) for t in transcripts - if bool(t.vars) and "{}_{}".format(str(pep['sequence']), t.transcript_id) in wtseqs + if bool(t.vars) and "{}_{}".format(str(pep["sequence"]), t.transcript_id) in wtseqs ] ) if len(wt) is 0: @@ -928,16 +928,18 @@ def create_peptide_variant_dictionary(peptides): pep_to_variants[pep] = variants return pep_to_variants + def is_created_by_variant(peptide): transcript_ids = [x.transcript_id for x in set(peptide.get_all_transcripts())] for t in transcript_ids: prot = peptide.proteins[t] for start_pos in peptide.proteinPos[t]: - for i in range(start_pos, start_pos+len(peptide)): + for i in range(start_pos, start_pos + len(peptide)): if i in prot.vars.keys(): return True return False + def make_predictions_from_variants( variants_all, methods, From 902e7ee4b13f093f197cccf2f92810a38d04e5bf Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Tue, 27 Sep 2022 17:14:35 +0200 Subject: [PATCH 25/78] fix frameshift check and peptide/var check --- bin/epaa.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/bin/epaa.py b/bin/epaa.py index 43f3e4a..7bb8f13 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -1,7 +1,6 @@ #!/usr/bin/env python import os -from symbol import testlist import sys import logging import csv @@ -90,7 +89,7 @@ def determine_variant_type(record, alternative): if record.is_snp: vt = VariationType.SNP elif record.is_indel: - if len(alternative) % 3 == 0: # no frameshift + if abs(len(alternative) - len(record.REF)) % 3 == 0: # no frameshift if record.is_deletion: vt = VariationType.DEL else: @@ -932,11 +931,18 @@ def create_peptide_variant_dictionary(peptides): def is_created_by_variant(peptide): transcript_ids = [x.transcript_id for x in set(peptide.get_all_transcripts())] for t in transcript_ids: - prot = peptide.proteins[t] - for start_pos in peptide.proteinPos[t]: - for i in range(start_pos, start_pos + len(peptide)): - if i in prot.vars.keys(): - return True + p = peptide.proteins[t] + varmap = p.vars + for pos, vars in varmap.items(): + for var in vars: + if var.type in [VariationType.FSDEL, VariationType.FSINS]: + if peptide.proteinPos[t][0] + len(peptide) > pos: + return True + else: + for start_pos in peptide.proteinPos[t]: + positions = list(range(start_pos, start_pos+len(peptide))) + if pos in positions: + return True return False @@ -975,7 +981,6 @@ def make_predictions_from_variants( for peplen in range(minlength, maxlength): peptide_gen = generator.generate_peptides_from_proteins(prots, peplen) - logger.info("Generated peptides at " + str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) peptides_var = [x for x in peptide_gen] peptides = [p for p in peptides_var if is_created_by_variant(p)] From 96adecd02412b3ecd5b39a145c5bb2f726965b94 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 4 Oct 2022 21:52:57 +0000 Subject: [PATCH 26/78] Template update for nf-core/tools version 2.6 --- .github/workflows/awsfulltest.yml | 4 ++ .github/workflows/awstest.yml | 4 ++ .prettierignore | 1 + CITATION.cff | 8 +-- assets/adaptivecard.json | 67 +++++++++++++++++++ assets/methods_description_template.yml | 25 +++++++ assets/multiqc_config.yml | 6 +- docs/usage.md | 8 +++ lib/NfcoreTemplate.groovy | 55 +++++++++++++++ lib/Utils.groovy | 21 ++++-- lib/WorkflowEpitopeprediction.groovy | 19 ++++++ main.nf | 3 +- modules.json | 27 ++++---- .../custom/dumpsoftwareversions/main.nf | 8 +-- .../custom/dumpsoftwareversions/meta.yml | 0 .../templates/dumpsoftwareversions.py | 0 modules/nf-core/{modules => }/fastqc/main.nf | 12 ++++ modules/nf-core/{modules => }/fastqc/meta.yml | 0 modules/nf-core/modules/multiqc/main.nf | 31 --------- modules/nf-core/multiqc/main.nf | 53 +++++++++++++++ .../nf-core/{modules => }/multiqc/meta.yml | 15 +++++ nextflow.config | 5 +- nextflow_schema.json | 18 +++++ workflows/epitopeprediction.nf | 26 ++++--- 24 files changed, 345 insertions(+), 71 deletions(-) create mode 100644 assets/adaptivecard.json create mode 100644 assets/methods_description_template.yml mode change 100755 => 100644 lib/Utils.groovy rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/main.nf (79%) rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/meta.yml (100%) rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py (100%) rename modules/nf-core/{modules => }/fastqc/main.nf (85%) rename modules/nf-core/{modules => }/fastqc/meta.yml (100%) delete mode 100644 modules/nf-core/modules/multiqc/main.nf create mode 100644 modules/nf-core/multiqc/main.nf rename modules/nf-core/{modules => }/multiqc/meta.yml (73%) diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 05df25c..971fc48 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -28,3 +28,7 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/epitopeprediction/results-${{ github.sha }}" } profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 2a4b889..3d1d8b9 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -23,3 +23,7 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/epitopeprediction/results-test-${{ github.sha }}" } profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.prettierignore b/.prettierignore index d0e7ae5..eb74a57 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,4 +1,5 @@ email_template.html +adaptivecard.json .nextflow* work/ data/ diff --git a/CITATION.cff b/CITATION.cff index 4533e2f..017666c 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -13,8 +13,8 @@ authors: given-names: Johannes - family-names: Wilm given-names: Andreas - - family-names: Ulysse Garcia - given-names: Maxime + - family-names: Garcia + given-names: Maxime Ulysse - family-names: Di Tommaso given-names: Paolo - family-names: Nahnsen @@ -39,8 +39,8 @@ prefered-citation: given-names: Johannes - family-names: Wilm given-names: Andreas - - family-names: Ulysse Garcia - given-names: Maxime + - family-names: Garcia + given-names: Maxime Ulysse - family-names: Di Tommaso given-names: Paolo - family-names: Nahnsen diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 0000000..e6b1cee --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/epitopeprediction v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 0000000..1c54d0f --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,25 @@ +id: "nf-core-epitopeprediction-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/epitopeprediction Methods Description" +section_href: "https://github.com/nf-core/epitopeprediction" +plot_type: "html" +## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | +

Methods

+

Data was processed using nf-core/epitopeprediction v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index ebd0854..fd3eb90 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -3,9 +3,11 @@ report_comment: > analysis pipeline. For information about how to interpret these results, please see the documentation. report_section_order: - software_versions: + "nf-core-epitopeprediction-methods-description": order: -1000 - "nf-core-epitopeprediction-summary": + software_versions: order: -1001 + "nf-core-epitopeprediction-summary": + order: -1002 export_plots: true diff --git a/docs/usage.md b/docs/usage.md index 9ad2b37..e861c4f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -237,6 +237,14 @@ See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). +## Azure Resource Requests + +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. + +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). + ## Running in the background Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 2fc0a9b..27feb00 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -145,6 +145,61 @@ class NfcoreTemplate { output_tf.withWriter { w -> w << email_txt } } + // + // Construct and send adaptive card + // https://adaptivecards.io + // + public static void adaptivecard(workflow, params, summary_params, projectDir, log) { + def hook_url = params.hook_url + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = workflow.manifest.version + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + def hf = new File("$projectDir/assets/adaptivecard.json") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } + } + // // Print pipeline summary on completion // diff --git a/lib/Utils.groovy b/lib/Utils.groovy old mode 100755 new mode 100644 index 28567bd..8d030f4 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -21,19 +21,26 @@ class Utils { } // Check that all channels are present - def required_channels = ['conda-forge', 'bioconda', 'defaults'] - def conda_check_failed = !required_channels.every { ch -> ch in channels } + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean // Check that they are in the right order - conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) - conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } - if (conda_check_failed) { + if (channels_missing | channel_priority_violation) { log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " There is a problem with your Conda configuration!\n\n" + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + - " NB: The order of the channels matters!\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" } } diff --git a/lib/WorkflowEpitopeprediction.groovy b/lib/WorkflowEpitopeprediction.groovy index c209f5f..7008c4b 100755 --- a/lib/WorkflowEpitopeprediction.groovy +++ b/lib/WorkflowEpitopeprediction.groovy @@ -2,6 +2,8 @@ // This file holds several functions specific to the workflow/epitopeprediction.nf in the nf-core/epitopeprediction pipeline // +import groovy.text.SimpleTemplateEngine + class WorkflowEpitopeprediction { // @@ -42,6 +44,23 @@ class WorkflowEpitopeprediction { yaml_file_text += "data: |\n" yaml_file_text += "${summary_section}" return yaml_file_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = run_workflow.toMap() + meta["manifest_map"] = run_workflow.manifest.toMap() + + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + def methods_text = mqc_methods_yaml.text + + def engine = new SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html }// // Exit pipeline if incorrect --genome key provided // diff --git a/main.nf b/main.nf index 4b254aa..7397ae4 100644 --- a/main.nf +++ b/main.nf @@ -4,7 +4,8 @@ nf-core/epitopeprediction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/epitopeprediction -Website: https://nf-co.re/epitopeprediction + + Website: https://nf-co.re/epitopeprediction Slack : https://nfcore.slack.com/channels/epitopeprediction ---------------------------------------------------------------------------------------- */ diff --git a/modules.json b/modules.json index 75f3e7e..63f1952 100644 --- a/modules.json +++ b/modules.json @@ -2,20 +2,21 @@ "name": "nf-core/epitopeprediction", "homePage": "https://github.com/nf-core/epitopeprediction", "repos": { - "nf-core/modules": { - "git_url": "https://github.com/nf-core/modules.git", + "https://github.com/nf-core/modules.git": { "modules": { - "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", - "branch": "master" - }, - "fastqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", - "branch": "master" - }, - "multiqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", - "branch": "master" + "nf-core": { + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "fastqc": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "multiqc": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + } } } } diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf similarity index 79% rename from modules/nf-core/modules/custom/dumpsoftwareversions/main.nf rename to modules/nf-core/custom/dumpsoftwareversions/main.nf index 327d510..cebb6e0 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -1,11 +1,11 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_low' + label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml similarity index 100% rename from modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml rename to modules/nf-core/custom/dumpsoftwareversions/meta.yml diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py similarity index 100% rename from modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py rename to modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/fastqc/main.nf similarity index 85% rename from modules/nf-core/modules/fastqc/main.nf rename to modules/nf-core/fastqc/main.nf index ed6b8c5..0573036 100644 --- a/modules/nf-core/modules/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -44,4 +44,16 @@ process FASTQC { END_VERSIONS """ } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml similarity index 100% rename from modules/nf-core/modules/fastqc/meta.yml rename to modules/nf-core/fastqc/meta.yml diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf deleted file mode 100644 index 1264aac..0000000 --- a/modules/nf-core/modules/multiqc/main.nf +++ /dev/null @@ -1,31 +0,0 @@ -process MULTIQC { - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" - - input: - path multiqc_files - - output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - multiqc -f $args . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf new file mode 100644 index 0000000..a8159a5 --- /dev/null +++ b/modules/nf-core/multiqc/main.nf @@ -0,0 +1,53 @@ +process MULTIQC { + label 'process_single' + + conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + + input: + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + """ + multiqc \\ + --force \\ + $args \\ + $config \\ + $extra_config \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ + + stub: + """ + touch multiqc_data + touch multiqc_plots + touch multiqc_report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml similarity index 73% rename from modules/nf-core/modules/multiqc/meta.yml rename to modules/nf-core/multiqc/meta.yml index 6fa891e..ebc29b2 100644 --- a/modules/nf-core/modules/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -12,11 +12,25 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] + input: - multiqc_files: type: file description: | List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. + pattern: "*.{yml,yaml}" + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + output: - report: type: file @@ -38,3 +52,4 @@ authors: - "@abhi18av" - "@bunop" - "@drpatelh" + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index 318b381..9da30bc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -21,7 +21,9 @@ params { // MultiQC options multiqc_config = null multiqc_title = null + multiqc_logo = null max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options outdir = null @@ -31,6 +33,7 @@ params { email_on_fail = null plaintext_email = false monochrome_logs = false + hook_url = null help = false validate_params = true show_hidden_params = false @@ -74,7 +77,6 @@ try { // } - profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { @@ -189,6 +191,7 @@ manifest { mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' version = '2.2.0dev' + doi = '' } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index 3ebab8c..80fb4c0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -213,12 +213,30 @@ "fa_icon": "fas fa-palette", "hidden": true }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.", + "hidden": true + }, "multiqc_config": { "type": "string", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true }, + "multiqc_logo": { + "type": "string", + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", + "hidden": true + }, + "multiqc_methods_description": { + "type": "string", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, "tracedir": { "type": "string", "description": "Directory to keep pipeline Nextflow logs and reports.", diff --git a/workflows/epitopeprediction.nf b/workflows/epitopeprediction.nf index 09cea37..a9394aa 100644 --- a/workflows/epitopeprediction.nf +++ b/workflows/epitopeprediction.nf @@ -23,8 +23,10 @@ if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input sample ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -46,9 +48,9 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' // // MODULE: Installed directly from nf-core/modules // -include { FASTQC } from '../modules/nf-core/modules/fastqc/main' -include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -89,15 +91,20 @@ workflow EPITOPEPREDICTION { workflow_summary = WorkflowEpitopeprediction.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) + methods_description = WorkflowEpitopeprediction.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + ch_methods_description = Channel.value(methods_description) + ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) MULTIQC ( - ch_multiqc_files.collect() + ch_multiqc_files.collect(), + ch_multiqc_config.collect().ifEmpty([]), + ch_multiqc_custom_config.collect().ifEmpty([]), + ch_multiqc_logo.collect().ifEmpty([]) ) multiqc_report = MULTIQC.out.report.toList() ch_versions = ch_versions.mix(MULTIQC.out.versions) @@ -114,6 +121,9 @@ workflow.onComplete { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } NfcoreTemplate.summary(workflow, params, log) + if (params.hook_url) { + NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) + } } /* From 70d29f2d0917f413fb9e181743536b102171866a Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Thu, 6 Oct 2022 17:15:53 +0200 Subject: [PATCH 27/78] formatting --- bin/epaa.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bin/epaa.py b/bin/epaa.py index 7bb8f13..79aa0f1 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -940,7 +940,7 @@ def is_created_by_variant(peptide): return True else: for start_pos in peptide.proteinPos[t]: - positions = list(range(start_pos, start_pos+len(peptide))) + positions = list(range(start_pos, start_pos + len(peptide))) if pos in positions: return True return False @@ -981,7 +981,6 @@ def make_predictions_from_variants( for peplen in range(minlength, maxlength): peptide_gen = generator.generate_peptides_from_proteins(prots, peplen) - peptides_var = [x for x in peptide_gen] peptides = [p for p in peptides_var if is_created_by_variant(p)] From 982e59b31deca6c3f99c5793dd604e771c942402 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Thu, 6 Oct 2022 17:32:45 +0200 Subject: [PATCH 28/78] update modules --- modules.json | 27 ++++++++++--------- .../custom/dumpsoftwareversions/main.nf | 0 .../custom/dumpsoftwareversions/meta.yml | 0 .../templates/dumpsoftwareversions.py | 0 modules/nf-core/{modules => }/gunzip/main.nf | 0 modules/nf-core/{modules => }/gunzip/meta.yml | 0 modules/nf-core/{modules => }/multiqc/main.nf | 0 .../nf-core/{modules => }/multiqc/meta.yml | 0 8 files changed, 14 insertions(+), 13 deletions(-) rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/main.nf (100%) rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/meta.yml (100%) rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py (100%) rename modules/nf-core/{modules => }/gunzip/main.nf (100%) rename modules/nf-core/{modules => }/gunzip/meta.yml (100%) rename modules/nf-core/{modules => }/multiqc/main.nf (100%) rename modules/nf-core/{modules => }/multiqc/meta.yml (100%) diff --git a/modules.json b/modules.json index 12be9cd..9c660e1 100644 --- a/modules.json +++ b/modules.json @@ -2,20 +2,21 @@ "name": "nf-core/epitopeprediction", "homePage": "https://github.com/nf-core/epitopeprediction", "repos": { - "nf-core/modules": { - "git_url": "https://github.com/nf-core/modules.git", + "https://github.com/nf-core/modules.git": { "modules": { - "custom/dumpsoftwareversions": { - "branch": "master", - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "gunzip": { - "branch": "master", - "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6" - }, - "multiqc": { - "branch": "master", - "git_sha": "5138acca0985ca01c38a1c4fba917d83772b1106" + "nf-core": { + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "8022c68e7403eecbd8ba9c49496f69f8c49d50f0" + }, + "gunzip": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "multiqc": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + } } } } diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf similarity index 100% rename from modules/nf-core/modules/custom/dumpsoftwareversions/main.nf rename to modules/nf-core/custom/dumpsoftwareversions/main.nf diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml similarity index 100% rename from modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml rename to modules/nf-core/custom/dumpsoftwareversions/meta.yml diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py similarity index 100% rename from modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py rename to modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py diff --git a/modules/nf-core/modules/gunzip/main.nf b/modules/nf-core/gunzip/main.nf similarity index 100% rename from modules/nf-core/modules/gunzip/main.nf rename to modules/nf-core/gunzip/main.nf diff --git a/modules/nf-core/modules/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml similarity index 100% rename from modules/nf-core/modules/gunzip/meta.yml rename to modules/nf-core/gunzip/meta.yml diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/multiqc/main.nf similarity index 100% rename from modules/nf-core/modules/multiqc/main.nf rename to modules/nf-core/multiqc/main.nf diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml similarity index 100% rename from modules/nf-core/modules/multiqc/meta.yml rename to modules/nf-core/multiqc/meta.yml From 9814017ee457b10dfa4de1b93078837a71881c38 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Fri, 7 Oct 2022 10:30:20 +0200 Subject: [PATCH 29/78] set pipeline logo param --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 786d1bb..97d9bb0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -52,7 +52,7 @@ params { // MultiQC options multiqc_config = null multiqc_title = null - multiqc_logo = null + multiqc_logo = "$projectDir/assets/nf-core-epitopeprediction_logo_light.png" max_multiqc_email_size = '25.MB' multiqc_methods_description = null From 90722cc809bb4c061f29d44c59a5c1fc87b4605a Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Fri, 7 Oct 2022 11:09:25 +0200 Subject: [PATCH 30/78] update CHANGELOG --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 35658fd..3ba13ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,13 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#180](https://github.com/nf-core/epitopeprediction/pull/180) - Add support for `VEP` annotated VCF files [#172](https://github.com/nf-core/epitopeprediction/issues/140) + ### `Changed` - [#177](https://github.com/nf-core/epitopeprediction/pull/177) - Update to nf-core template `2.5.1` - [#178](https://github.com/nf-core/epitopeprediction/pull/178) - Update MultiQC to `1.13` +- [#180](https://github.com/nf-core/epitopeprediction/pull/180) - Update to nf-core template `2.6` +- [#180](https://github.com/nf-core/epitopeprediction/pull/180) - Improve runtime for VCF-based predictions ### `Fixed` +- [#180](https://github.com/nf-core/epitopeprediction/pull/180) - Fix issue with `frameshift` determination + ## v2.1.0 - Nordring - 2022-08-02 ### `Added` From e3966d3a404ee70926cbb667839e0ee181b26b2c Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Tue, 11 Oct 2022 08:17:42 +0200 Subject: [PATCH 31/78] apply renaming suggestions --- bin/epaa.py | 80 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 35 deletions(-) diff --git a/bin/epaa.py b/bin/epaa.py index 79aa0f1..ff12346 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -312,10 +312,10 @@ def read_vcf(filename, pass_only=True): VEP_KEY = "CSQ" SNPEFF_KEY = "ANN" - vl = list() + variants = list() with open(filename, "rt") as tsvfile: vcf_reader = vcf.Reader(tsvfile) - vl = [r for r in vcf_reader] + variants = [r for r in vcf_reader] # list of mandatory (meta)data exclusion_list = ["ANN", "CSQ"] @@ -340,15 +340,15 @@ def read_vcf(filename, pass_only=True): list_vars = [] transcript_ids = [] - for num, record in enumerate(vl): + for num, record in enumerate(variants): chromosome = record.CHROM.strip("chr") genomic_position = record.POS variation_dbid = record.ID reference = str(record.REF) alternative_list = record.ALT - filter = record.FILTER + record_filter = record.FILTER - if pass_only and filter: + if pass_only and record_filter: continue """ @@ -591,61 +591,69 @@ def read_lig_ID_values(filename): def create_protein_column_value(pep): - all_proteins = [transcriptProteinMap[x.transcript_id.split(":")[0]] for x in set(pep.get_all_transcripts())] + all_proteins = [ + transcriptProteinMap[transcript.transcript_id.split(":")[0]] for transcript in set(pep.get_all_transcripts()) + ] return ",".join(set([item for sublist in all_proteins for item in sublist])) def create_transcript_column_value(pep): - return ",".join(set([x.transcript_id.split(":")[0] for x in set(pep.get_all_transcripts())])) + return ",".join(set([transcript.transcript_id.split(":")[0] for transcript in set(pep.get_all_transcripts())])) def create_mutationsyntax_column_value(pep, pep_dictionary): syntaxes = [] - for v in set(pep_dictionary[pep]): - for c in v.coding: - syntaxes.append(v.coding[c]) - return ",".join(set([y.aaMutationSyntax for y in syntaxes])) + for variant in set(pep_dictionary[pep]): + for coding in variant.coding: + syntaxes.append(variant.coding[coding]) + return ",".join(set([mutationSyntax.aaMutationSyntax for mutationSyntax in syntaxes])) def create_mutationsyntax_genome_column_value(pep, pep_dictionary): syntaxes = [] - for v in set(pep_dictionary[pep]): - for c in v.coding: - syntaxes.append(v.coding[c]) - return ",".join(set([y.cdsMutationSyntax for y in syntaxes])) + for variant in set(pep_dictionary[pep]): + for coding in variant.coding: + syntaxes.append(v.coding[coding]) + return ",".join(set([mutationSyntax.cdsMutationSyntax for mutationSyntax in syntaxes])) def create_gene_column_value(pep, pep_dictionary): - return ",".join(set([y.gene for y in set(pep_dictionary[pep])])) + return ",".join(set([variant.gene for variant in set(pep_dictionary[pep])])) def create_variant_pos_column_value(pep, pep_dictionary): - return ",".join(set(["{}".format(y.genomePos) for y in set(pep_dictionary[pep])])) + return ",".join(set(["{}".format(variant.genomePos) for variant in set(pep_dictionary[pep])])) def create_variant_chr_column_value(pep, pep_dictionary): - return ",".join(set(["{}".format(y.chrom) for y in set(pep_dictionary[pep])])) + return ",".join(set(["{}".format(variant.chrom) for variant in set(pep_dictionary[pep])])) def create_variant_type_column_value(pep, pep_dictionary): types = {0: "SNP", 1: "DEL", 2: "INS", 3: "FSDEL", 4: "FSINS", 5: "UNKNOWN"} - return ",".join(set([types[y.type] for y in set(pep_dictionary[pep])])) + return ",".join(set([types[variant.type] for variant in set(pep_dictionary[pep])])) def create_variant_syn_column_value(pep, pep_dictionary): - return ",".join(set([str(y.isSynonymous) for y in set(pep_dictionary[pep])])) + return ",".join(set([str(variant.isSynonymous) for variant in set(pep_dictionary[pep])])) def create_variant_hom_column_value(pep, pep_dictionary): - return ",".join(set([str(y.isHomozygous) for y in set(pep_dictionary[pep])])) + return ",".join(set([str(variant.isHomozygous) for variant in set(pep_dictionary[pep])])) def create_coding_column_value(pep, pep_dictionary): - return ",".join(set([str(y.coding) for y in set(pep_dictionary[pep])])) + return ",".join(set([str(variant.coding) for variant in set(pep_dictionary[pep])])) def create_metadata_column_value(pep, c, pep_dictionary): - meta = set([str(y.get_metadata(c)[0]) for y in set(pep_dictionary[pep[0]]) if len(y.get_metadata(c)) != 0]) + meta = set( + [ + str(variant.get_metadata(c)[0]) + for variant in set(pep_dictionary[pep[0]]) + if len(variant.get_metadata(c)) != 0 + ] + ) if len(meta) is 0: return np.nan else: @@ -653,18 +661,18 @@ def create_metadata_column_value(pep, c, pep_dictionary): def create_wt_seq_column_value(pep, wtseqs): - transcripts = [x for x in set(pep["sequence"].get_all_transcripts())] - wt = set( + transcripts = [transcript for transcript in set(pep["sequence"].get_all_transcripts())] + wild_type = set( [ - str(wtseqs["{}_{}".format(str(pep["sequence"]), t.transcript_id)]) - for t in transcripts - if bool(t.vars) and "{}_{}".format(str(pep["sequence"]), t.transcript_id) in wtseqs + str(wtseqs["{}_{}".format(str(pep["sequence"]), transcript.transcript_id)]) + for transcript in transcripts + if bool(transcript.vars) and "{}_{}".format(str(pep["sequence"]), transcript.transcript_id) in wtseqs ] ) - if len(wt) is 0: + if len(wild_type) is 0: return np.nan else: - return ",".join(wt) + return ",".join(wild_type) def create_quant_column_value(row, dict): @@ -917,6 +925,7 @@ def generate_wt_seqs(peptides): return wt_dict +# TODO potential improvement in epytope def create_peptide_variant_dictionary(peptides): pep_to_variants = {} for pep in peptides: @@ -928,6 +937,7 @@ def create_peptide_variant_dictionary(peptides): return pep_to_variants +# TODO replace by epytope function once released def is_created_by_variant(peptide): transcript_ids = [x.transcript_id for x in set(peptide.get_all_transcripts())] for t in transcript_ids: @@ -1076,8 +1086,8 @@ def make_predictions_from_variants( df.columns = df.columns.str.replace("Score", "score") df.columns = df.columns.str.replace("Rank", "rank") - for c in set(metadata): - df[c] = df.apply(lambda row: create_metadata_column_value(row, c, pep_to_variants), axis=1) + for col in set(metadata): + df[col] = df.apply(lambda row: create_metadata_column_value(row, col, pep_to_variants), axis=1) pred_dataframes.append(df) @@ -1301,9 +1311,9 @@ def __main__(): peptides, metadata = read_peptide_input(args.peptides) else: if args.somatic_mutations.endswith(".GSvar") or args.somatic_mutations.endswith(".tsv"): - vl, transcripts, metadata = read_GSvar(args.somatic_mutations) + variant_list, transcripts, metadata = read_GSvar(args.somatic_mutations) elif args.somatic_mutations.endswith(".vcf"): - vl, transcripts, metadata = read_vcf(args.somatic_mutations) + variant_list, transcripts, metadata = read_vcf(args.somatic_mutations) transcripts = list(set(transcripts)) transcriptProteinMap, transcriptSwissProtMap = get_protein_ids_for_transcripts( @@ -1377,7 +1387,7 @@ def __main__(): ) else: pred_dataframes, statistics, all_peptides_filtered, proteins = make_predictions_from_variants( - vl, + variant_list, methods, thresholds, args.use_affinity_thresholds, From 7c1e2d0ca43fb3d90026f05a66cbab28436e60c9 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Tue, 11 Oct 2022 08:20:27 +0200 Subject: [PATCH 32/78] Update CHANGELOG.md Co-authored-by: Jonas Scheid <43858870+jonasscheid@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ba13ba..e247456 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- [#180](https://github.com/nf-core/epitopeprediction/pull/180) - Add support for `VEP` annotated VCF files [#172](https://github.com/nf-core/epitopeprediction/issues/140) +- [#180](https://github.com/nf-core/epitopeprediction/pull/180) - Add support for `VEP` annotated VCF files [#172](https://github.com/nf-core/epitopeprediction/issues/172) ### `Changed` From be77cccf98f7a1e52e26a34195e12730667c9668 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Tue, 11 Oct 2022 08:26:47 +0200 Subject: [PATCH 33/78] fix variable name --- bin/epaa.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bin/epaa.py b/bin/epaa.py index ff12346..e946232 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -255,9 +255,9 @@ def read_GSvar(filename, pass_only=True): transToVar = {} # fix because of memory/timing issues due to combinatorial explosion - for v in list_vars: - for trans_id in v.coding.keys(): - transToVar.setdefault(trans_id, []).append(v) + for variant in list_vars: + for trans_id in variant.coding.keys(): + transToVar.setdefault(trans_id, []).append(variant) for tId, vs in transToVar.items(): if len(vs) > 10: @@ -503,9 +503,9 @@ def read_vcf(filename, pass_only=True): # fix because of memory/timing issues due to combinatorial explosion - for v in list_vars: - for trans_id in v.coding.keys(): - transToVar.setdefault(trans_id, []).append(v) + for variant in list_vars: + for trans_id in variant.coding.keys(): + transToVar.setdefault(trans_id, []).append(variant) for tId, vs in transToVar.items(): if len(vs) > 10: @@ -613,7 +613,7 @@ def create_mutationsyntax_genome_column_value(pep, pep_dictionary): syntaxes = [] for variant in set(pep_dictionary[pep]): for coding in variant.coding: - syntaxes.append(v.coding[coding]) + syntaxes.append(variant.coding[coding]) return ",".join(set([mutationSyntax.cdsMutationSyntax for mutationSyntax in syntaxes])) From 6205347dddd40bc4b53e6f4231ea6875a5224f2f Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Thu, 1 Dec 2022 15:57:37 +0100 Subject: [PATCH 34/78] update GRCh38 biomart archive --- bin/epaa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/epaa.py b/bin/epaa.py index e946232..0bd28c2 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -1301,7 +1301,7 @@ def __main__(): metadata = [] proteins = [] - references = {"GRCh37": "http://feb2014.archive.ensembl.org", "GRCh38": "http://aug2017.archive.ensembl.org"} + references = {"GRCh37": "http://feb2014.archive.ensembl.org", "GRCh38": "http://apr2018.archive.ensembl.org"} global transcriptProteinMap global transcriptSwissProtMap From e15058cba7a89cf8d4632bb5df601fbac7517fd0 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Mon, 5 Dec 2022 08:44:03 +0100 Subject: [PATCH 35/78] fix external tool version --- assets/external_tools_meta.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/external_tools_meta.json b/assets/external_tools_meta.json index 70e7219..828f5d7 100644 --- a/assets/external_tools_meta.json +++ b/assets/external_tools_meta.json @@ -42,7 +42,7 @@ "binary_name": "netMHCpan" }, "4.1": { - "version": 4.0, + "version": 4.1, "software_md5": "5f6eab43feb80a24e32eb02b22e9db5f", "data_url": "https://services.healthtech.dtu.dk/services/NetMHCpan-4.1/data.tar.gz", "data_md5": "4bdd3944cb4c5b8ba4d8900dae074c85", From e0c94dbe630467c46ef21bb7349a6e02c6dd552a Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 8 Dec 2022 13:09:31 +0000 Subject: [PATCH 36/78] Template update for nf-core/tools version 2.7.1 --- .devcontainer/devcontainer.json | 27 +++++++++++++ .gitattributes | 1 + .github/CONTRIBUTING.md | 16 ++++++++ .github/ISSUE_TEMPLATE/bug_report.yml | 2 +- .github/workflows/ci.yml | 8 +++- .github/workflows/fix-linting.yml | 6 +-- .github/workflows/linting.yml | 18 +++++---- .github/workflows/linting_comment.yml | 2 +- .prettierignore | 2 + CITATION.cff | 56 --------------------------- README.md | 4 +- assets/slackreport.json | 34 ++++++++++++++++ docs/usage.md | 24 +++++++----- lib/NfcoreSchema.groovy | 1 - lib/NfcoreTemplate.groovy | 41 +++++++++++++++----- lib/WorkflowMain.groovy | 18 ++++++--- modules.json | 9 +++-- modules/local/samplesheet_check.nf | 4 ++ nextflow.config | 12 ++++-- nextflow_schema.json | 8 +++- workflows/epitopeprediction.nf | 11 +++--- 21 files changed, 193 insertions(+), 111 deletions(-) create mode 100644 .devcontainer/devcontainer.json delete mode 100644 CITATION.cff create mode 100644 assets/slackreport.json diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..ea27a58 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,27 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", + "python.formatting.yapfPath": "/opt/conda/bin/yapf", + "python.linting.flake8Path": "/opt/conda/bin/flake8", + "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", + "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", + "python.linting.pylintPath": "/opt/conda/bin/pylint" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.gitattributes b/.gitattributes index 050bb12..7a2dabc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ *.config linguist-language=nextflow +*.nf.test linguist-language=nextflow modules/nf-core/** linguist-generated subworkflows/nf-core/** linguist-generated diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index d6b4355..efea5c1 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -101,3 +101,19 @@ If you are using a new feature from core Nextflow, you may bump the minimum requ ### Images and figures For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/epitopeprediction/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) +- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 31eaa8d..046680a 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,7 +42,7 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 21.10.3)_ + * Nextflow version _(eg. 22.10.1)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a4205db..8e0b61b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,6 +11,10 @@ on: env: NXF_ANSI_LOG: false +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true + jobs: test: name: Run pipeline with test data @@ -20,11 +24,11 @@ jobs: strategy: matrix: NXF_VER: - - "21.10.3" + - "22.10.1" - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index f166298..835130c 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -24,7 +24,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install Prettier run: npm install -g prettier @prettier/plugin-php @@ -34,9 +34,9 @@ jobs: id: prettier_status run: | if prettier --check ${GITHUB_WORKSPACE}; then - echo "::set-output name=result::pass" + echo "name=result::pass" >> $GITHUB_OUTPUT else - echo "::set-output name=result::fail" + echo "name=result::fail" >> $GITHUB_OUTPUT fi - name: Run 'prettier --write' diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8a5ce69..858d622 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -4,6 +4,8 @@ name: nf-core linting # that the code meets the nf-core guidelines. on: push: + branches: + - dev pull_request: release: types: [published] @@ -12,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -25,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install Prettier run: npm install -g prettier @@ -38,7 +40,7 @@ jobs: PythonBlack: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Check code lints with Black uses: psf/black@stable @@ -69,12 +71,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v4 with: python-version: "3.7" architecture: "x64" @@ -97,7 +99,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 04758f6..3963518 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -18,7 +18,7 @@ jobs: - name: Get PR number id: pr_number - run: echo "::set-output name=pr_number::$(cat linting-logs/PR_number.txt)" + run: echo "name=pr_number::$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment uses: marocchino/sticky-pull-request-comment@v2 diff --git a/.prettierignore b/.prettierignore index eb74a57..437d763 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,5 +1,6 @@ email_template.html adaptivecard.json +slackreport.json .nextflow* work/ data/ @@ -8,3 +9,4 @@ results/ testing/ testing* *.pyc +bin/ diff --git a/CITATION.cff b/CITATION.cff deleted file mode 100644 index 017666c..0000000 --- a/CITATION.cff +++ /dev/null @@ -1,56 +0,0 @@ -cff-version: 1.2.0 -message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication" -authors: - - family-names: Ewels - given-names: Philip - - family-names: Peltzer - given-names: Alexander - - family-names: Fillinger - given-names: Sven - - family-names: Patel - given-names: Harshil - - family-names: Alneberg - given-names: Johannes - - family-names: Wilm - given-names: Andreas - - family-names: Garcia - given-names: Maxime Ulysse - - family-names: Di Tommaso - given-names: Paolo - - family-names: Nahnsen - given-names: Sven -title: "The nf-core framework for community-curated bioinformatics pipelines." -version: 2.4.1 -doi: 10.1038/s41587-020-0439-x -date-released: 2022-05-16 -url: https://github.com/nf-core/tools -prefered-citation: - type: article - authors: - - family-names: Ewels - given-names: Philip - - family-names: Peltzer - given-names: Alexander - - family-names: Fillinger - given-names: Sven - - family-names: Patel - given-names: Harshil - - family-names: Alneberg - given-names: Johannes - - family-names: Wilm - given-names: Andreas - - family-names: Garcia - given-names: Maxime Ulysse - - family-names: Di Tommaso - given-names: Paolo - - family-names: Nahnsen - given-names: Sven - doi: 10.1038/s41587-020-0439-x - journal: nature biotechnology - start: 276 - end: 278 - title: "The nf-core framework for community-curated bioinformatics pipelines." - issue: 3 - volume: 38 - year: 2020 - url: https://dx.doi.org/10.1038/s41587-020-0439-x diff --git a/README.md b/README.md index 61a86fa..c329db3 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/epitopeprediction/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -31,7 +31,7 @@ On release, automated continuous integration tests run the pipeline on a full-si ## Quick Start -1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`) +1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.1`) 2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 0000000..043d02f --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/docs/usage.md b/docs/usage.md index e861c4f..ad81edf 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -83,9 +83,9 @@ nextflow pull nf-core/epitopeprediction It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [nf-core/epitopeprediction releases page](https://github.com/nf-core/epitopeprediction/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. +First, go to the [nf-core/epitopeprediction releases page](https://github.com/nf-core/epitopeprediction/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. ## Core Nextflow arguments @@ -95,7 +95,7 @@ This version number will be logged in reports when you run the pipeline, so that Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from quay.io e.g [FastQC](https://quay.io/repository/biocontainers/fastqc) except for Singularity which directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/) and Conda which downloads and installs software locally from [Bioconda](https://bioconda.github.io/). +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. > We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. @@ -104,8 +104,11 @@ The pipeline also dynamically loads configurations from [https://github.com/nf-c Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters - `docker` - A generic configuration profile to be used with [Docker](https://docker.com/) - `singularity` @@ -118,9 +121,6 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) - `conda` - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. -- `test` - - A profile with a complete configuration for automated testing - - Includes links to test data so needs no other parameters ### `-resume` @@ -169,8 +169,14 @@ Work dir: Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` ``` +#### For beginners + +A first step to bypass this error, you could try to increase the amount of CPUs, memory, and time for the whole pipeline. Therefor you can try to increase the resource for the parameters `--max_cpus`, `--max_memory`, and `--max_time`. Based on the error above, you have to increase the amount of memory. Therefore you can go to the [parameter documentation of rnaseq](https://nf-co.re/rnaseq/3.9/parameters) and scroll down to the `show hidden parameter` button to get the default value for `--max_memory`. In this case 128GB, you than can try to run your pipeline again with `--max_memory 200GB -resume` to skip all process, that were already calculated. If you can not increase the resource of the complete pipeline, you can try to adapt the resource for a single process as mentioned below. + +#### Advanced option on process level + To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). -We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/software/star/align/main.nf`. +We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/star/align/main.nf`. If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. @@ -189,7 +195,7 @@ process { > > If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. -### Updating containers +### Updating containers (advanced users) The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy index b3d092f..33cd4f6 100755 --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -46,7 +46,6 @@ class NfcoreSchema { 'quiet', 'syslog', 'v', - 'version', // Options for `nextflow run` command 'ansi', diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 27feb00..25a0a74 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -32,6 +32,25 @@ class NfcoreTemplate { } } + // + // Generate version string + // + public static String version(workflow) { + String version_string = "" + + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string + } + // // Construct and send completion email // @@ -61,7 +80,7 @@ class NfcoreTemplate { misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp def email_fields = [:] - email_fields['version'] = workflow.manifest.version + email_fields['version'] = NfcoreTemplate.version(workflow) email_fields['runName'] = workflow.runName email_fields['success'] = workflow.success email_fields['dateComplete'] = workflow.complete @@ -146,10 +165,10 @@ class NfcoreTemplate { } // - // Construct and send adaptive card - // https://adaptivecards.io + // Construct and send a notification to a web server as JSON + // e.g. Microsoft Teams and Slack // - public static void adaptivecard(workflow, params, summary_params, projectDir, log) { + public static void IM_notification(workflow, params, summary_params, projectDir, log) { def hook_url = params.hook_url def summary = [:] @@ -170,7 +189,7 @@ class NfcoreTemplate { misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp def msg_fields = [:] - msg_fields['version'] = workflow.manifest.version + msg_fields['version'] = NfcoreTemplate.version(workflow) msg_fields['runName'] = workflow.runName msg_fields['success'] = workflow.success msg_fields['dateComplete'] = workflow.complete @@ -178,13 +197,16 @@ class NfcoreTemplate { msg_fields['exitStatus'] = workflow.exitStatus msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") msg_fields['projectDir'] = workflow.projectDir msg_fields['summary'] = summary << misc_fields // Render the JSON template def engine = new groovy.text.GStringTemplateEngine() - def hf = new File("$projectDir/assets/adaptivecard.json") + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("$projectDir/assets/${json_path}") def json_template = engine.createTemplate(hf).make(msg_fields) def json_message = json_template.toString() @@ -209,7 +231,7 @@ class NfcoreTemplate { if (workflow.stats.ignoredCount == 0) { log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" } } else { log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" @@ -297,6 +319,7 @@ class NfcoreTemplate { // public static String logo(workflow, monochrome_logs) { Map colors = logColours(monochrome_logs) + String workflow_version = NfcoreTemplate.version(workflow) String.format( """\n ${dashedLine(monochrome_logs)} @@ -305,7 +328,7 @@ class NfcoreTemplate { ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} ${dashedLine(monochrome_logs)} """.stripIndent() ) diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 86a4ecf..4c99016 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -19,7 +19,7 @@ class WorkflowMain { } // - // Print help to screen if required + // Generate help string // public static String help(workflow, params, log) { def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" @@ -32,7 +32,7 @@ class WorkflowMain { } // - // Print parameter summary log to screen + // Generate parameter summary log string // public static String paramsSummaryLog(workflow, params, log) { def summary_log = '' @@ -53,15 +53,21 @@ class WorkflowMain { System.exit(0) } - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) + // Print workflow version and exit on --version + if (params.version) { + String workflow_version = NfcoreTemplate.version(workflow) + log.info "${workflow.manifest.name} ${workflow_version}" + System.exit(0) } // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params, log) + // Validate workflow parameters via the JSON schema + if (params.validate_params) { + NfcoreSchema.validateParameters(workflow, params, log) + } + // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) diff --git a/modules.json b/modules.json index 63f1952..cd66c64 100644 --- a/modules.json +++ b/modules.json @@ -7,15 +7,18 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "installed_by": ["modules"] } } } diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index 7df86c8..021eb32 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -1,5 +1,6 @@ process SAMPLESHEET_CHECK { tag "$samplesheet" + label 'process_single' conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -13,6 +14,9 @@ process SAMPLESHEET_CHECK { path '*.csv' , emit: csv path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: // This script is bundled with the pipeline, in nf-core/epitopeprediction/bin/ """ check_samplesheet.py \\ diff --git a/nextflow.config b/nextflow.config index 9da30bc..becbf81 100644 --- a/nextflow.config +++ b/nextflow.config @@ -35,6 +35,7 @@ params { monochrome_logs = false hook_url = null help = false + version = false validate_params = true show_hidden_params = false schema_ignore_params = 'genomes' @@ -81,6 +82,7 @@ profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { params.enable_conda = true + conda.enabled = true docker.enabled = false singularity.enabled = false podman.enabled = false @@ -89,6 +91,7 @@ profiles { } mamba { params.enable_conda = true + conda.enabled = true conda.useMamba = true docker.enabled = false singularity.enabled = false @@ -104,6 +107,9 @@ profiles { shifter.enabled = false charliecloud.enabled = false } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + } singularity { singularity.enabled = true singularity.autoMounts = true @@ -185,11 +191,11 @@ dag { manifest { name = 'nf-core/epitopeprediction' - author = 'Christopher Mohr, Alexander Peltzer' + author = """Christopher Mohr, Alexander Peltzer""" homePage = 'https://github.com/nf-core/epitopeprediction' - description = 'A fully reproducible and state of the art epitope prediction pipeline.' + description = """A fully reproducible and state of the art epitope prediction pipeline.""" mainScript = 'main.nf' - nextflowVersion = '!>=21.10.3' + nextflowVersion = '!>=22.10.1' version = '2.2.0dev' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 80fb4c0..c30be2f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -176,6 +176,12 @@ "fa_icon": "fas fa-question-circle", "hidden": true }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, "publish_dir_mode": { "type": "string", "default": "copy", @@ -217,7 +223,7 @@ "type": "string", "description": "Incoming hook URL for messaging service", "fa_icon": "fas fa-people-group", - "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true }, "multiqc_config": { diff --git a/workflows/epitopeprediction.nf b/workflows/epitopeprediction.nf index a9394aa..dc8d484 100644 --- a/workflows/epitopeprediction.nf +++ b/workflows/epitopeprediction.nf @@ -82,7 +82,7 @@ workflow EPITOPEPREDICTION { ch_versions = ch_versions.mix(FASTQC.out.versions.first()) CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') + ch_versions.unique{ it.text }.collectFile(name: 'collated_versions.yml') ) // @@ -102,12 +102,11 @@ workflow EPITOPEPREDICTION { MULTIQC ( ch_multiqc_files.collect(), - ch_multiqc_config.collect().ifEmpty([]), - ch_multiqc_custom_config.collect().ifEmpty([]), - ch_multiqc_logo.collect().ifEmpty([]) + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() ) multiqc_report = MULTIQC.out.report.toList() - ch_versions = ch_versions.mix(MULTIQC.out.versions) } /* @@ -122,7 +121,7 @@ workflow.onComplete { } NfcoreTemplate.summary(workflow, params, log) if (params.hook_url) { - NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) + NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) } } From 50a3fa66599b3883eb85afb108303eeca9734308 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Mon, 12 Dec 2022 15:03:58 +0100 Subject: [PATCH 37/78] fix optional channels --- modules/local/epytope_peptide_prediction.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/epytope_peptide_prediction.nf b/modules/local/epytope_peptide_prediction.nf index fc536fd..2c5e022 100644 --- a/modules/local/epytope_peptide_prediction.nf +++ b/modules/local/epytope_peptide_prediction.nf @@ -12,8 +12,8 @@ process EPYTOPE_PEPTIDE_PREDICTION { output: tuple val(meta), path("*.json"), emit: json - tuple val(meta), path("*.tsv"), optional: true, emit: predicted - tuple val(meta), path("*.fasta"), optional: true, emit: fasta + tuple val(meta), path("*.tsv"), emit: predicted, optional: true + tuple val(meta), path("*.fasta"), emit: fasta, optional: true path "versions.yml", emit: versions script: From a36a3dd3c386618dfce83838efd9787f60457ccd Mon Sep 17 00:00:00 2001 From: ggabernet Date: Mon, 12 Dec 2022 16:04:35 +0100 Subject: [PATCH 38/78] test error messages --- bin/epaa.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/bin/epaa.py b/bin/epaa.py index 0bd28c2..fa99ff8 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -31,9 +31,15 @@ # instantiate global logger object logger = logging.getLogger(__name__) # turn off passing of messages to root logger -logger.propagate = False +# logger.propagate = False logger.setLevel(logging.DEBUG) +handler = logging.StreamHandler(sys.stdout) +handler.setLevel(logging.DEBUG) +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +handler.setFormatter(formatter) +logger.addHandler(handler) + ID_SYSTEM_USED = EIdentifierTypes.ENSEMBL transcriptProteinMap = {} transcriptSwissProtMap = {} @@ -498,7 +504,7 @@ def read_vcf(filename, pass_only=True): list_vars.append(var) else: logger.error("No supported variant annotation string found. Aborting.") - sys.exit(1) + sys.exit(1, "No supported variant annotation string found. Aborting.") transToVar = {} # fix because of memory/timing issues due to combinatorial explosion @@ -1294,7 +1300,7 @@ def __main__(): if len(sys.argv) <= 1: parser.print_help() - sys.exit(1) + sys.exit(1, "Provide at least one argument.") logger.addHandler(logging.FileHandler("{}_prediction.log".format(args.identifier))) logger.info("Starting predictions at " + str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) @@ -1308,9 +1314,11 @@ def __main__(): # read in variants or peptides if args.peptides: + logger.info("Running epaa for peptides...") peptides, metadata = read_peptide_input(args.peptides) else: if args.somatic_mutations.endswith(".GSvar") or args.somatic_mutations.endswith(".tsv"): + logger.info("Running epaa for variants...") variant_list, transcripts, metadata = read_GSvar(args.somatic_mutations) elif args.somatic_mutations.endswith(".vcf"): variant_list, transcripts, metadata = read_vcf(args.somatic_mutations) From 6e1bc5c90c3998ca57a428b37d1a1efa03a5a239 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Mon, 12 Dec 2022 16:12:19 +0100 Subject: [PATCH 39/78] also print error to stderr --- bin/check_samplesheet.py | 6 +++--- bin/epaa.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 439bb8e..419e7eb 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -159,7 +159,7 @@ def sniff_format(handle): sniffer = csv.Sniffer() if not sniffer.has_header(peek): logger.critical("The given sample sheet does not appear to contain a header.") - sys.exit(1) + sys.exit("The given sample sheet does not appear to contain a header.") dialect = sniffer.sniff(peek) return dialect @@ -199,7 +199,7 @@ def print_error(error, context="Line", context_str=""): error, context.strip(), context_str.strip() ) print(error_str) - sys.exit(1) + sys.exit() def check_allele_nomenclature(allele): @@ -325,7 +325,7 @@ def main(argv=None): logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") if not args.file_in.is_file(): logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) + sys.exit(f"The given input file {args.file_in} was not found!") args.file_out.parent.mkdir(parents=True, exist_ok=True) check_samplesheet(args.file_in, args.file_out) diff --git a/bin/epaa.py b/bin/epaa.py index fa99ff8..abab3d0 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -504,7 +504,7 @@ def read_vcf(filename, pass_only=True): list_vars.append(var) else: logger.error("No supported variant annotation string found. Aborting.") - sys.exit(1, "No supported variant annotation string found. Aborting.") + sys.exit("No supported variant annotation string found. Aborting.") transToVar = {} # fix because of memory/timing issues due to combinatorial explosion @@ -1300,7 +1300,7 @@ def __main__(): if len(sys.argv) <= 1: parser.print_help() - sys.exit(1, "Provide at least one argument.") + sys.exit("Provide at least one argument to epaa.py.") logger.addHandler(logging.FileHandler("{}_prediction.log".format(args.identifier))) logger.info("Starting predictions at " + str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) From 2d1c9cbb3d6145b28c921733413cfe1c9d093ffd Mon Sep 17 00:00:00 2001 From: ggabernet Date: Mon, 12 Dec 2022 16:16:36 +0100 Subject: [PATCH 40/78] add version info to logger --- bin/epaa.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/epaa.py b/bin/epaa.py index abab3d0..5fd50a9 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -1303,6 +1303,7 @@ def __main__(): sys.exit("Provide at least one argument to epaa.py.") logger.addHandler(logging.FileHandler("{}_prediction.log".format(args.identifier))) + logger.info("Running Epitope Prediction And Annotation version: " + str(VERSION)) logger.info("Starting predictions at " + str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) metadata = [] From e940cae73ea52d71c449747bb2ade073237c6f25 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Mon, 12 Dec 2022 16:22:01 +0100 Subject: [PATCH 41/78] better error --- bin/epaa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/epaa.py b/bin/epaa.py index 5fd50a9..326d9c9 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -504,7 +504,7 @@ def read_vcf(filename, pass_only=True): list_vars.append(var) else: logger.error("No supported variant annotation string found. Aborting.") - sys.exit("No supported variant annotation string found. Aborting.") + sys.exit("No supported variant annotation string found. Input VCFs require annotation with SNPEff or VEP prior to running the epitope prediction pipeline.") transToVar = {} # fix because of memory/timing issues due to combinatorial explosion From 7bc1c9fc96c1b94b3cc732dc82359cc01ae0586c Mon Sep 17 00:00:00 2001 From: ggabernet Date: Mon, 12 Dec 2022 16:29:01 +0100 Subject: [PATCH 42/78] set format also to file handler --- bin/epaa.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bin/epaa.py b/bin/epaa.py index 326d9c9..0f59ef4 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -1302,7 +1302,11 @@ def __main__(): parser.print_help() sys.exit("Provide at least one argument to epaa.py.") - logger.addHandler(logging.FileHandler("{}_prediction.log".format(args.identifier))) + filehandler = logging.FileHandler("{}_prediction.log".format(args.identifier)) + filehandler.setLevel(logging.DEBUG) + filehandler.setFormatter(formatter) + logger.addHandler(filehandler) + logger.info("Running Epitope Prediction And Annotation version: " + str(VERSION)) logger.info("Starting predictions at " + str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) From 99b6ec60f3f350372ea40e9633bf89cf999d8f85 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Mon, 12 Dec 2022 16:33:22 +0100 Subject: [PATCH 43/78] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e247456..ae0f27d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#180](https://github.com/nf-core/epitopeprediction/pull/180) - Add support for `VEP` annotated VCF files [#172](https://github.com/nf-core/epitopeprediction/issues/172) +- [#186](https://github.com/nf-core/epitopeprediction/pull/180) - Logging of epaa.py script also to stdout and added sys.exit error messages. ### `Changed` From 1aba3022b4f10d38325c6cd976716951f726e2c6 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Mon, 12 Dec 2022 17:01:30 +0100 Subject: [PATCH 44/78] Update CHANGELOG.md Co-authored-by: Christopher Mohr --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae0f27d..9c4af8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#180](https://github.com/nf-core/epitopeprediction/pull/180) - Add support for `VEP` annotated VCF files [#172](https://github.com/nf-core/epitopeprediction/issues/172) -- [#186](https://github.com/nf-core/epitopeprediction/pull/180) - Logging of epaa.py script also to stdout and added sys.exit error messages. +- [#186](https://github.com/nf-core/epitopeprediction/pull/186) - Log messages from `epaa.py` script to stdout and provide `sys.exit` error messages. ### `Changed` From 5324001b06b2952516eef282af485cb3d60eb179 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Mon, 12 Dec 2022 17:13:06 +0100 Subject: [PATCH 45/78] undo changes samplesheet --- bin/check_samplesheet.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 419e7eb..439bb8e 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -159,7 +159,7 @@ def sniff_format(handle): sniffer = csv.Sniffer() if not sniffer.has_header(peek): logger.critical("The given sample sheet does not appear to contain a header.") - sys.exit("The given sample sheet does not appear to contain a header.") + sys.exit(1) dialect = sniffer.sniff(peek) return dialect @@ -199,7 +199,7 @@ def print_error(error, context="Line", context_str=""): error, context.strip(), context_str.strip() ) print(error_str) - sys.exit() + sys.exit(1) def check_allele_nomenclature(allele): @@ -325,7 +325,7 @@ def main(argv=None): logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") if not args.file_in.is_file(): logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(f"The given input file {args.file_in} was not found!") + sys.exit(2) args.file_out.parent.mkdir(parents=True, exist_ok=True) check_samplesheet(args.file_in, args.file_out) From 91f7636014616cae016957f545c65dbadef5fa0a Mon Sep 17 00:00:00 2001 From: ggabernet Date: Tue, 13 Dec 2022 10:02:27 +0100 Subject: [PATCH 46/78] fix black linting --- bin/epaa.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/epaa.py b/bin/epaa.py index 0f59ef4..9767692 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -36,7 +36,7 @@ handler = logging.StreamHandler(sys.stdout) handler.setLevel(logging.DEBUG) -formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) @@ -504,7 +504,9 @@ def read_vcf(filename, pass_only=True): list_vars.append(var) else: logger.error("No supported variant annotation string found. Aborting.") - sys.exit("No supported variant annotation string found. Input VCFs require annotation with SNPEff or VEP prior to running the epitope prediction pipeline.") + sys.exit( + "No supported variant annotation string found. Input VCFs require annotation with SNPEff or VEP prior to running the epitope prediction pipeline." + ) transToVar = {} # fix because of memory/timing issues due to combinatorial explosion From eb73a6c255b4ed23a48ae4eb2387908b59fe363a Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Tue, 13 Dec 2022 11:53:54 +0100 Subject: [PATCH 47/78] format modules.json --- modules.json | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/modules.json b/modules.json index 1a991fe..271a0d5 100644 --- a/modules.json +++ b/modules.json @@ -8,23 +8,17 @@ "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gunzip": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } } From 373de790f1b5c610b23ab2fd7630e99a317ed844 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Tue, 13 Dec 2022 12:03:11 +0100 Subject: [PATCH 48/78] add missing bracket --- workflows/epitopeprediction.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/workflows/epitopeprediction.nf b/workflows/epitopeprediction.nf index 0e833ab..f9e6e8e 100644 --- a/workflows/epitopeprediction.nf +++ b/workflows/epitopeprediction.nf @@ -460,6 +460,7 @@ workflow EPITOPEPREDICTION { ch_multiqc_logo.toList() ) multiqc_report = MULTIQC.out.report.toList() + } } /* From c494fd9c7672054c3c8616bda44abd052ce19722 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Tue, 13 Dec 2022 12:04:19 +0100 Subject: [PATCH 49/78] add to CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e247456..37afdba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#178](https://github.com/nf-core/epitopeprediction/pull/178) - Update MultiQC to `1.13` - [#180](https://github.com/nf-core/epitopeprediction/pull/180) - Update to nf-core template `2.6` - [#180](https://github.com/nf-core/epitopeprediction/pull/180) - Improve runtime for VCF-based predictions +- [#187](https://github.com/nf-core/epitopeprediction/pull/187) - Update to nf-core template `2.7.1` ### `Fixed` From 8c6248e634ae5e551e8b2565c282aae7698903a3 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Tue, 13 Dec 2022 15:20:33 +0100 Subject: [PATCH 50/78] Update workflows/epitopeprediction.nf Co-authored-by: Gisela Gabernet --- workflows/epitopeprediction.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/epitopeprediction.nf b/workflows/epitopeprediction.nf index f9e6e8e..cc65590 100644 --- a/workflows/epitopeprediction.nf +++ b/workflows/epitopeprediction.nf @@ -436,7 +436,7 @@ workflow EPITOPEPREDICTION { // MODULE: Pipeline reporting // CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique{ it.text }.collectFile(name: 'collated_versions.yml') + ch_versions.unique().collectFile(name: 'collated_versions.yml') ) // From e012a2a9e30f2eb9bf5d5e44ba1281a1fbf2896f Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Mon, 19 Dec 2022 12:05:08 +0000 Subject: [PATCH 51/78] Template update for nf-core/tools version 2.7.2 --- .github/workflows/fix-linting.yml | 4 +- .github/workflows/linting_comment.yml | 2 +- lib/WorkflowMain.groovy | 2 +- modules.json | 6 +- modules/local/samplesheet_check.nf | 2 +- .../custom/dumpsoftwareversions/main.nf | 2 +- .../templates/dumpsoftwareversions.py | 99 ++++++++++--------- modules/nf-core/fastqc/main.nf | 40 +++----- modules/nf-core/multiqc/main.nf | 2 +- nextflow.config | 3 - nextflow_schema.json | 6 -- workflows/epitopeprediction.nf | 2 +- 12 files changed, 82 insertions(+), 88 deletions(-) mode change 100644 => 100755 modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 835130c..9ebf496 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -34,9 +34,9 @@ jobs: id: prettier_status run: | if prettier --check ${GITHUB_WORKSPACE}; then - echo "name=result::pass" >> $GITHUB_OUTPUT + echo "result=pass" >> $GITHUB_OUTPUT else - echo "name=result::fail" >> $GITHUB_OUTPUT + echo "result=fail" >> $GITHUB_OUTPUT fi - name: Run 'prettier --write' diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 3963518..0bbcd30 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -18,7 +18,7 @@ jobs: - name: Get PR number id: pr_number - run: echo "name=pr_number::$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment uses: marocchino/sticky-pull-request-comment@v2 diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 4c99016..12066c8 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -72,7 +72,7 @@ class WorkflowMain { NfcoreTemplate.checkConfigProvided(workflow, log) // Check that conda channels are set-up correctly - if (params.enable_conda) { + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { Utils.checkCondaChannels(log) } diff --git a/modules.json b/modules.json index cd66c64..cfb4980 100644 --- a/modules.json +++ b/modules.json @@ -7,17 +7,17 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["modules"] } } diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index 021eb32..5b2cba0 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -2,7 +2,7 @@ process SAMPLESHEET_CHECK { tag "$samplesheet" label 'process_single' - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : 'quay.io/biocontainers/python:3.8.3' }" diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index cebb6e0..3df2176 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,7 +2,7 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) + conda "bioconda::multiqc=1.13" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py old mode 100644 new mode 100755 index 787bdb7..e55b8d4 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -1,5 +1,9 @@ #!/usr/bin/env python + +"""Provide functions to merge multiple versions.yml files.""" + + import platform from textwrap import dedent @@ -7,6 +11,7 @@ def _make_versions_html(versions): + """Generate a tabular HTML output of all versions for MultiQC.""" html = [ dedent( """\\ @@ -45,47 +50,53 @@ def _make_versions_html(versions): return "\\n".join(html) -versions_this_module = {} -versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, -} - -with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - -# aggregate versions by the module name (derived from fully-qualified process name) -versions_by_module = {} -for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - -versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", -} - -versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), -} - -with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) -with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - -with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) +def main(): + """Load all version files and generate merged output.""" + versions_this_module = {} + versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, + } + + with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + + # aggregate versions by the module name (derived from fully-qualified process name) + versions_by_module = {} + for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + + versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + } + + versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), + } + + with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + + with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 0573036..9ae5838 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,7 +2,7 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) + conda "bioconda::fastqc=0.11.9" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : 'quay.io/biocontainers/fastqc:0.11.9--0' }" @@ -20,30 +20,22 @@ process FASTQC { script: def args = task.ext.args ?: '' - // Add soft-links to original FastQs for consistent naming in pipeline def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - fastqc $args --threads $task.cpus ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + """ + printf "%s %s\\n" $rename_to | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + fastqc $args --threads $task.cpus $renamed_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ stub: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index a8159a5..68f66be 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,7 +1,7 @@ process MULTIQC { label 'process_single' - conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) + conda "bioconda::multiqc=1.13" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" diff --git a/nextflow.config b/nextflow.config index becbf81..ae52867 100644 --- a/nextflow.config +++ b/nextflow.config @@ -39,7 +39,6 @@ params { validate_params = true show_hidden_params = false schema_ignore_params = 'genomes' - enable_conda = false // Config options @@ -81,7 +80,6 @@ try { profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { - params.enable_conda = true conda.enabled = true docker.enabled = false singularity.enabled = false @@ -90,7 +88,6 @@ profiles { charliecloud.enabled = false } mamba { - params.enable_conda = true conda.enabled = true conda.useMamba = true docker.enabled = false diff --git a/nextflow_schema.json b/nextflow_schema.json index c30be2f..763f421 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -263,12 +263,6 @@ "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." - }, - "enable_conda": { - "type": "boolean", - "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.", - "hidden": true, - "fa_icon": "fas fa-bacon" } } } diff --git a/workflows/epitopeprediction.nf b/workflows/epitopeprediction.nf index dc8d484..0574281 100644 --- a/workflows/epitopeprediction.nf +++ b/workflows/epitopeprediction.nf @@ -82,7 +82,7 @@ workflow EPITOPEPREDICTION { ch_versions = ch_versions.mix(FASTQC.out.versions.first()) CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique{ it.text }.collectFile(name: 'collated_versions.yml') + ch_versions.unique().collectFile(name: 'collated_versions.yml') ) // From 0ba23ed3e553d058967abaea023400daee03f143 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Thu, 22 Dec 2022 14:03:58 +0100 Subject: [PATCH 52/78] update nextflow version for ci --- .github/workflows/ci-external.yml | 2 +- .github/workflows/ci.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-external.yml b/.github/workflows/ci-external.yml index 189daf0..d0e0503 100644 --- a/.github/workflows/ci-external.yml +++ b/.github/workflows/ci-external.yml @@ -17,7 +17,7 @@ jobs: matrix: include: # Test pipeline minimum Nextflow version - - NXF_VER: "21.10.3" + - NXF_VER: "22.10.1" NXF_EDGE: "" # Test latest edge release of Nextflow - NXF_VER: "" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8d4c67b..0da72f1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,7 +49,7 @@ jobs: matrix: include: # Test pipeline minimum Nextflow version - - NXF_VER: "21.10.3" + - NXF_VER: "22.10.1" NXF_EDGE: "" # Test latest edge release of Nextflow - NXF_VER: "" @@ -90,7 +90,7 @@ jobs: matrix: include: # Test pipeline minimum Nextflow version - - NXF_VER: "21.10.3" + - NXF_VER: "22.10.1" NXF_EDGE: "" # Test latest edge release of Nextflow - NXF_VER: "" From 7e2d27fd150dedcd604f7783684445552887d596 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Thu, 22 Dec 2022 14:53:12 +0100 Subject: [PATCH 53/78] remove enable_conda from local modules --- modules/local/cat_files.nf | 2 +- modules/local/csvtk_concat.nf | 2 +- modules/local/csvtk_split.nf | 2 +- modules/local/epytope_check_requested_models.nf | 2 +- modules/local/epytope_generate_peptides.nf | 2 +- modules/local/epytope_peptide_prediction.nf | 2 +- modules/local/epytope_show_supported_models.nf | 2 +- modules/local/snpsift_split.nf | 2 +- modules/local/split_peptides.nf | 2 +- modules/local/variant_split.nf | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/local/cat_files.nf b/modules/local/cat_files.nf index b002bfa..b993048 100644 --- a/modules/local/cat_files.nf +++ b/modules/local/cat_files.nf @@ -1,7 +1,7 @@ process CAT_FILES { label 'process_low' - conda (params.enable_conda ? "conda-forge:sed=4.8" : null) + conda "conda-forge:sed=4.8" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/cat:5.2.3--hdfd78af_1' : 'quay.io/biocontainers/cat:5.2.3--hdfd78af_1' }" diff --git a/modules/local/csvtk_concat.nf b/modules/local/csvtk_concat.nf index 2134337..651afb3 100644 --- a/modules/local/csvtk_concat.nf +++ b/modules/local/csvtk_concat.nf @@ -1,7 +1,7 @@ process CSVTK_CONCAT { label 'process_low' - conda (params.enable_conda ? "bioconda::csvtk=0.23.0" : null) + conda "bioconda::csvtk=0.23.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/csvtk:0.23.0--h9ee0642_0' : 'quay.io/biocontainers/csvtk:0.23.0--h9ee0642_0' }" diff --git a/modules/local/csvtk_split.nf b/modules/local/csvtk_split.nf index 0d118ee..cdb2db0 100644 --- a/modules/local/csvtk_split.nf +++ b/modules/local/csvtk_split.nf @@ -1,7 +1,7 @@ process CSVTK_SPLIT { label 'process_low' - conda (params.enable_conda ? "conda-forge::sed=4.7 bioconda::csvtk=0.23.0" : null) + conda "conda-forge::sed=4.7 bioconda::csvtk=0.23.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/csvtk:0.23.0--h9ee0642_0' : 'quay.io/biocontainers/csvtk:0.23.0--h9ee0642_0' }" diff --git a/modules/local/epytope_check_requested_models.nf b/modules/local/epytope_check_requested_models.nf index 5327cbe..7ae5ecd 100644 --- a/modules/local/epytope_check_requested_models.nf +++ b/modules/local/epytope_check_requested_models.nf @@ -1,7 +1,7 @@ process EPYTOPE_CHECK_REQUESTED_MODELS { label 'process_low' - conda (params.enable_conda ? "bioconda::epytope=3.1.0" : null) + conda "bioconda::epytope=3.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/epytope:3.1.0--pyh5e36f6f_0' : 'quay.io/biocontainers/epytope:3.1.0--pyh5e36f6f_0' }" diff --git a/modules/local/epytope_generate_peptides.nf b/modules/local/epytope_generate_peptides.nf index eaaa997..35acfb9 100644 --- a/modules/local/epytope_generate_peptides.nf +++ b/modules/local/epytope_generate_peptides.nf @@ -2,7 +2,7 @@ process EPYTOPE_GENERATE_PEPTIDES { label 'process_low' tag "${meta.sample}" - conda (params.enable_conda ? "bioconda::epytope=3.1.0" : null) + conda "bioconda::epytope=3.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/epytope:3.1.0--pyh5e36f6f_0' : 'quay.io/biocontainers/epytope:3.1.0--pyh5e36f6f_0' }" diff --git a/modules/local/epytope_peptide_prediction.nf b/modules/local/epytope_peptide_prediction.nf index 2c5e022..3b906e5 100644 --- a/modules/local/epytope_peptide_prediction.nf +++ b/modules/local/epytope_peptide_prediction.nf @@ -1,7 +1,7 @@ process EPYTOPE_PEPTIDE_PREDICTION { label 'process_low' - conda (params.enable_conda ? "conda-forge::coreutils=9.1 conda-forge::tcsh=6.20.00 bioconda::epytope=3.1.0 conda-forge::gawk=5.1.0 conda-forge::perl=5.32.1" : null) + conda "conda-forge::coreutils=9.1 conda-forge::tcsh=6.20.00 bioconda::epytope=3.1.0 conda-forge::gawk=5.1.0 conda-forge::perl=5.32.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-11bbf0d242ea96f7b9c08d5b5bc26f2cd5ac5943:3419f320edefe6077631798f50d7bd4f8dc4763f-0' : 'quay.io/biocontainers/mulled-v2-11bbf0d242ea96f7b9c08d5b5bc26f2cd5ac5943:3419f320edefe6077631798f50d7bd4f8dc4763f-0' }" diff --git a/modules/local/epytope_show_supported_models.nf b/modules/local/epytope_show_supported_models.nf index 157758c..433ee14 100644 --- a/modules/local/epytope_show_supported_models.nf +++ b/modules/local/epytope_show_supported_models.nf @@ -1,7 +1,7 @@ process EPYTOPE_SHOW_SUPPORTED_MODELS { label 'process_low' - conda (params.enable_conda ? "bioconda::epytope=3.1.0" : null) + conda "bioconda::epytope=3.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/epytope:3.1.0--pyh5e36f6f_0' : 'quay.io/biocontainers/epytope:3.1.0--pyh5e36f6f_0' }" diff --git a/modules/local/snpsift_split.nf b/modules/local/snpsift_split.nf index 8702345..a3376e7 100644 --- a/modules/local/snpsift_split.nf +++ b/modules/local/snpsift_split.nf @@ -1,7 +1,7 @@ process SNPSIFT_SPLIT { label 'process_low' - conda (params.enable_conda ? "bioconda::snpsift=4.2" : null) + conda "bioconda::snpsift=4.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/snpsift:4.2--hdfd78af_5' : 'quay.io/biocontainers/snpsift:4.2--hdfd78af_5' }" diff --git a/modules/local/split_peptides.nf b/modules/local/split_peptides.nf index b6e5aa1..744c932 100644 --- a/modules/local/split_peptides.nf +++ b/modules/local/split_peptides.nf @@ -1,7 +1,7 @@ process SPLIT_PEPTIDES { label 'process_low' - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : 'quay.io/biocontainers/python:3.8.3' }" diff --git a/modules/local/variant_split.nf b/modules/local/variant_split.nf index 6074865..9f2ca07 100644 --- a/modules/local/variant_split.nf +++ b/modules/local/variant_split.nf @@ -1,7 +1,7 @@ process VARIANT_SPLIT { label 'process_low' - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : 'quay.io/biocontainers/python:3.8.3' }" From 32e5345262308eef9e2db98e2315754e6870bf94 Mon Sep 17 00:00:00 2001 From: Christopher Mohr Date: Thu, 22 Dec 2022 17:08:32 +0100 Subject: [PATCH 54/78] remove remaining enable_conda, use conda.enabled in workflow --- modules/local/external_tools_import.nf | 2 +- modules/local/get_prediction_versions.nf | 2 +- modules/local/merge_json.nf | 2 +- nextflow.config | 2 ++ workflows/epitopeprediction.nf | 2 +- 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/modules/local/external_tools_import.nf b/modules/local/external_tools_import.nf index a764cbf..7262f73 100644 --- a/modules/local/external_tools_import.nf +++ b/modules/local/external_tools_import.nf @@ -4,7 +4,7 @@ process EXTERNAL_TOOLS_IMPORT { label 'process_low' - conda (params.enable_conda ? "conda-forge::coreutils=9.1" : null) + conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : 'biocontainers/biocontainers:v1.2.0_cv1' }" diff --git a/modules/local/get_prediction_versions.nf b/modules/local/get_prediction_versions.nf index 7cf76c2..fcc7cf9 100644 --- a/modules/local/get_prediction_versions.nf +++ b/modules/local/get_prediction_versions.nf @@ -1,7 +1,7 @@ process GET_PREDICTION_VERSIONS { label 'process_low' - conda (params.enable_conda ? "bioconda::epytope=3.1.0" : null) + conda "bioconda::epytope=3.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/epytope:3.1.0--pyh5e36f6f_0' : 'quay.io/biocontainers/epytope:3.1.0--pyh5e36f6f_0' }" diff --git a/modules/local/merge_json.nf b/modules/local/merge_json.nf index 0eca9cc..0031ffa 100644 --- a/modules/local/merge_json.nf +++ b/modules/local/merge_json.nf @@ -1,7 +1,7 @@ process MERGE_JSON { label 'process_low' - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : 'quay.io/biocontainers/python:3.8.3' }" diff --git a/nextflow.config b/nextflow.config index 123934f..ffda809 100644 --- a/nextflow.config +++ b/nextflow.config @@ -80,6 +80,8 @@ params { config_profile_url = null config_profile_name = null + conda.enabled = false + // Max resource options // Defaults only, expecting to be overwritten max_memory = '128.GB' diff --git a/workflows/epitopeprediction.nf b/workflows/epitopeprediction.nf index cc65590..d228988 100644 --- a/workflows/epitopeprediction.nf +++ b/workflows/epitopeprediction.nf @@ -150,7 +150,7 @@ workflow EPITOPEPREDICTION { if (tools.isEmpty()) { exit 1, "No valid tools specified." } - if (params.enable_conda && params.tools.contains("netmhc")) { + if (params.conda.enabled && params.tools.contains("netmhc")) { log.warn("Please note: if you want to use external prediction tools with conda it might be necessary to set --netmhc_system to darwin depending on your system.") } From 417e92053013a9b1e2506c41fafc490e341b3265 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Mon, 27 Feb 2023 15:12:59 +0100 Subject: [PATCH 55/78] fix black linting --- bin/check_samplesheet.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 439bb8e..a8a9284 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -217,7 +217,6 @@ def make_dir(path): def check_samplesheet(file_in, file_out): - """ sample,alleles,mhc_class,filename GBM_1,A*01:01;A*02:01;B*07:02;B*24:02;C*03:01;C*04:01,I,gbm_1_anno.vcf|gbm_1_peps.tsv|gbm_1_prot.fasta @@ -244,7 +243,6 @@ def check_samplesheet(file_in, file_out): sample_run_dict = {} with open(file_in, "r") as fin: - ## Check header COL_NUM = 4 HEADER = ["sample", "alleles", "mhc_class", "filename"] From 103be9d0d09d855c4c33c316562ed2fe079df64f Mon Sep 17 00:00:00 2001 From: ggabernet Date: Mon, 27 Feb 2023 15:14:59 +0100 Subject: [PATCH 56/78] bump versions --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index ffda809..b5c9094 100644 --- a/nextflow.config +++ b/nextflow.config @@ -238,7 +238,7 @@ manifest { description = """A fully reproducible and state of the art epitope prediction pipeline.""" mainScript = 'main.nf' nextflowVersion = '!>=22.10.1' - version = '2.2.0dev' + version = '2.2.0' doi = '' } From d19b0fc77b372a50219fc890b86e0d98355acf91 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Tue, 28 Feb 2023 16:50:30 +0100 Subject: [PATCH 57/78] Update README.md Co-authored-by: FriederikeHanssen --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6564544..44af412 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,8 @@ [![GitHub Actions CI Status](https://github.com/nf-core/epitopeprediction/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/epitopeprediction/actions?query=workflow%3A%22nf-core+CI%22) [![GitHub Actions Linting Status](https://github.com/nf-core/epitopeprediction/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/epitopeprediction/actions?query=workflow%3A%22nf-core+linting%22) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/epitopeprediction/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3564666-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3564666) +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/epitopeprediction/results) +[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3564666-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3564666) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) From 16e83a7758d70d617f7235451c8e16aeac228f15 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Tue, 28 Feb 2023 17:30:08 +0100 Subject: [PATCH 58/78] add cat versions --- modules/local/cat_files.nf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/local/cat_files.nf b/modules/local/cat_files.nf index b993048..c79116f 100644 --- a/modules/local/cat_files.nf +++ b/modules/local/cat_files.nf @@ -11,6 +11,7 @@ process CAT_FILES { output: tuple val(meta), path("*_prediction*"), emit: output + path "versions.yml", emit: versions script: def fileExt = input[0].name.tokenize("\\.")[-1] @@ -20,5 +21,10 @@ process CAT_FILES { """ cat $input > ${prefix}_${type}.${fileExt} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS """ } From 93df79b17838e86ef596bbaec9a8ba1b804cdb59 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Tue, 28 Feb 2023 17:30:15 +0100 Subject: [PATCH 59/78] add external tool versions --- modules/local/external_tools_import.nf | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/local/external_tools_import.nf b/modules/local/external_tools_import.nf index 7262f73..0b4f2db 100644 --- a/modules/local/external_tools_import.nf +++ b/modules/local/external_tools_import.nf @@ -14,7 +14,7 @@ process EXTERNAL_TOOLS_IMPORT { output: path "${toolname}", emit: nonfree_tools - val "v_*.txt", emit: versions + path "versions.yml", emit: versions script: """ @@ -75,7 +75,9 @@ process EXTERNAL_TOOLS_IMPORT { # # CREATE VERSION FILE # - echo "${toolname} ${toolversion}" > "v_${toolname}.txt" - + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ${toolname}: ${toolversion} + END_VERSIONS """ } From db9ec039602c312935f3959f3e33f66297f7f200 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Tue, 28 Feb 2023 17:30:27 +0100 Subject: [PATCH 60/78] add yml tool versions --- modules/local/get_prediction_versions.nf | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/modules/local/get_prediction_versions.nf b/modules/local/get_prediction_versions.nf index fcc7cf9..bc5e681 100644 --- a/modules/local/get_prediction_versions.nf +++ b/modules/local/get_prediction_versions.nf @@ -10,16 +10,17 @@ process GET_PREDICTION_VERSIONS { val external_tool_versions output: - path "versions.csv", emit: versions + path "versions.yml", emit: versions script: def external_tools = external_tool_versions.join(",") """ - cat <<-END_VERSIONS > versions.csv - mhcflurry: \$(mhcflurry-predict --version 2>&1 | sed 's/^mhcflurry //; s/ .*\$//') - mhcnuggets: \$(python -c "import pkg_resources; print('mhcnuggets' + pkg_resources.get_distribution('mhcnuggets').version)" | sed 's/^mhcnuggets//; s/ .*\$//' ) - epytope: \$(python -c "import pkg_resources; print('epytope' + pkg_resources.get_distribution('epytope').version)" | sed 's/^epytope//; s/ .*\$//') + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mhcflurry: \$(mhcflurry-predict --version 2>&1 | sed 's/^mhcflurry //; s/ .*\$//') + mhcnuggets: \$(python -c "import pkg_resources; print('mhcnuggets' + pkg_resources.get_distribution('mhcnuggets').version)" | sed 's/^mhcnuggets//; s/ .*\$//' ) + epytope: \$(python -c "import pkg_resources; print('epytope' + pkg_resources.get_distribution('epytope').version)" | sed 's/^epytope//; s/ .*\$//') END_VERSIONS IFS=',' read -r -a external_tools <<< \"$external_tools\" From 615042f6de952b5ad5534f2dd9aed6a666426c1b Mon Sep 17 00:00:00 2001 From: ggabernet Date: Tue, 28 Feb 2023 17:39:51 +0100 Subject: [PATCH 61/78] add licenses to scripts --- bin/check_requested_models.py | 2 ++ bin/check_supported_models.py | 2 ++ bin/epaa.py | 3 +-- bin/gen_peptides.py | 2 ++ bin/merge_jsons.py | 2 ++ bin/split_peptides.py | 2 ++ bin/split_vcf_by_variants.py | 1 + 7 files changed, 12 insertions(+), 2 deletions(-) diff --git a/bin/check_requested_models.py b/bin/check_requested_models.py index d95cc69..2a509e0 100755 --- a/bin/check_requested_models.py +++ b/bin/check_requested_models.py @@ -1,4 +1,6 @@ #!/usr/bin/env python +# Written by Sabrina Krakau, Christopher Mohr and released under the MIT license. + import sys import csv diff --git a/bin/check_supported_models.py b/bin/check_supported_models.py index 3fcb015..403359d 100755 --- a/bin/check_supported_models.py +++ b/bin/check_supported_models.py @@ -1,4 +1,6 @@ #!/usr/bin/env python +# Written by Sabrina Krakau, Christopher Mohr and released under the MIT license. + import sys import csv diff --git a/bin/epaa.py b/bin/epaa.py index 9767692..422f2b8 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# Written by Christopher Mohr and released under the MIT license. import os import sys @@ -30,8 +31,6 @@ # instantiate global logger object logger = logging.getLogger(__name__) -# turn off passing of messages to root logger -# logger.propagate = False logger.setLevel(logging.DEBUG) handler = logging.StreamHandler(sys.stdout) diff --git a/bin/gen_peptides.py b/bin/gen_peptides.py index 70ecddb..dbfdde2 100755 --- a/bin/gen_peptides.py +++ b/bin/gen_peptides.py @@ -1,4 +1,6 @@ + #!/usr/bin/env python +# Written by Sabrina Krakau, Christopher Mohr and released under the MIT license. import sys import argparse diff --git a/bin/merge_jsons.py b/bin/merge_jsons.py index 222307e..aa469e3 100755 --- a/bin/merge_jsons.py +++ b/bin/merge_jsons.py @@ -1,4 +1,6 @@ #!/usr/bin/env python +# Written by Sabrina Krakau, Christopher Mohr, Gisela Gabernet and released under the MIT license. + import os import sys diff --git a/bin/split_peptides.py b/bin/split_peptides.py index f8a0e31..2ac4e88 100755 --- a/bin/split_peptides.py +++ b/bin/split_peptides.py @@ -1,4 +1,6 @@ #!/usr/bin/env python +# Written by Sabrina Krakau, Christopher Mohr and released under the MIT license. + import math import argparse diff --git a/bin/split_vcf_by_variants.py b/bin/split_vcf_by_variants.py index 212a7d4..de73c5f 100755 --- a/bin/split_vcf_by_variants.py +++ b/bin/split_vcf_by_variants.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# Written by Christopher Mohr and released under the MIT license. import argparse import logging From e682dd264f67cf306ddb5efd00e7c8feca99eeac Mon Sep 17 00:00:00 2001 From: ggabernet Date: Tue, 28 Feb 2023 17:42:16 +0100 Subject: [PATCH 62/78] update changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea17c3c..a3a496e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.2.0dev - [name] - [date] +## v2.2.0dev - WaldhausenOst - 2023-03-01 ### `Added` @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#180](https://github.com/nf-core/epitopeprediction/pull/180) - Fix issue with `frameshift` determination +- [#194](https://github.com/nf-core/epitopeprediction/pull/194) - Fix software versions collection and add script licenses ## v2.1.0 - Nordring - 2022-08-02 From 53c65bdf9fdbdcf1e08ef03c482bbdeaa3bc6e93 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Tue, 28 Feb 2023 20:54:37 +0100 Subject: [PATCH 63/78] fix black --- bin/gen_peptides.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/gen_peptides.py b/bin/gen_peptides.py index dbfdde2..a52aa14 100755 --- a/bin/gen_peptides.py +++ b/bin/gen_peptides.py @@ -1,4 +1,3 @@ - #!/usr/bin/env python # Written by Sabrina Krakau, Christopher Mohr and released under the MIT license. From 7362c050d0229b6a12c29f53694a082e08942543 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Tue, 28 Feb 2023 20:56:25 +0100 Subject: [PATCH 64/78] fix get versions --- modules/local/external_tools_import.nf | 1 - modules/local/get_prediction_versions.nf | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/local/external_tools_import.nf b/modules/local/external_tools_import.nf index 0b4f2db..560f557 100644 --- a/modules/local/external_tools_import.nf +++ b/modules/local/external_tools_import.nf @@ -76,7 +76,6 @@ process EXTERNAL_TOOLS_IMPORT { # CREATE VERSION FILE # cat <<-END_VERSIONS > versions.yml - "${task.process}": ${toolname}: ${toolversion} END_VERSIONS """ diff --git a/modules/local/get_prediction_versions.nf b/modules/local/get_prediction_versions.nf index bc5e681..2dae3b2 100644 --- a/modules/local/get_prediction_versions.nf +++ b/modules/local/get_prediction_versions.nf @@ -26,7 +26,7 @@ process GET_PREDICTION_VERSIONS { IFS=',' read -r -a external_tools <<< \"$external_tools\" if ! [ -z "${external_tool_versions}" ]; then for TOOL in "\${external_tools[@]}"; do - echo "\$TOOL" >> versions.csv + echo "\$TOOL" >> versions.yml done fi """ From 5b03bc907cbf591aa90896ba44c23848803ad20a Mon Sep 17 00:00:00 2001 From: ggabernet Date: Wed, 1 Mar 2023 10:01:49 +0100 Subject: [PATCH 65/78] collect all versions --- workflows/epitopeprediction.nf | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/workflows/epitopeprediction.nf b/workflows/epitopeprediction.nf index d228988..5d83f77 100644 --- a/workflows/epitopeprediction.nf +++ b/workflows/epitopeprediction.nf @@ -129,7 +129,7 @@ workflow EPITOPEPREDICTION { GUNZIP_VCF ( ch_samples_from_sheet.variant_compressed ) - ch_versions = ch_versions.mix(GUNZIP_VCF.out.versions) + ch_versions = ch_versions.mix(GUNZIP_VCF.out.versions.ifEmpty(null)) ch_variants_uncompressed = GUNZIP_VCF.out.gunzip .mix(ch_samples_from_sheet.variant_uncompressed) @@ -165,8 +165,8 @@ workflow EPITOPEPREDICTION { .collect() // get versions of all prediction tools - GET_PREDICTION_VERSIONS(ch_external_versions.ifEmpty([])) - ch_prediction_tool_versions = GET_PREDICTION_VERSIONS.out.versions.ifEmpty("") + GET_PREDICTION_VERSIONS(ch_external_versions.ifEmpty(null)) + ch_prediction_tool_versions = GET_PREDICTION_VERSIONS.out.versions.ifEmpty(null) // TODO I guess it would be better to have two subworkflows for the if else parts (CM) if (params.show_supported_models) { @@ -177,6 +177,7 @@ workflow EPITOPEPREDICTION { .combine(ch_prediction_tool_versions) .first() ) + ch_versions = ch_versions.mix(SHOW_SUPPORTED_MODELS.out.versions.ifEmpty(null)) } else { @@ -192,13 +193,14 @@ workflow EPITOPEPREDICTION { ch_samples_uncompressed.variant, ch_prediction_tool_versions ) + ch_versions = ch_versions.mix(EPYTOPE_CHECK_REQUESTED_MODELS.out.versions.ifEmpty(null)) // perform the check requested models on the protein files EPYTOPE_CHECK_REQUESTED_MODELS_PROTEIN( ch_samples_uncompressed.protein, ch_prediction_tool_versions ) - + ch_versions = ch_versions.mix(EPYTOPE_CHECK_REQUESTED_MODELS_PROTEIN.out.versions.ifEmpty(null)) // perform the check requested models on the peptide file where we need the input itself to determine the given peptide lengths EPYTOPE_CHECK_REQUESTED_MODELS_PEP( ch_samples_uncompressed @@ -206,6 +208,7 @@ workflow EPITOPEPREDICTION { .map { meta_data, input_file -> tuple( meta_data, input_file ) }, ch_prediction_tool_versions ) + ch_versions = ch_versions.mix(EPYTOPE_CHECK_REQUESTED_MODELS_PEP.out.versions.ifEmpty(null)) // Return a warning if this is raised EPYTOPE_CHECK_REQUESTED_MODELS @@ -314,13 +317,13 @@ workflow EPITOPEPREDICTION { EPYTOPE_GENERATE_PEPTIDES( ch_samples_uncompressed.protein ) + ch_versions = ch_versions.mix(EPYTOPE_GENERATE_PEPTIDES.out.versions.ifEmpty(null)) + SPLIT_PEPTIDES_PROTEIN( EPYTOPE_GENERATE_PEPTIDES.out.splitted ) - - ch_versions = ch_versions.mix( EPYTOPE_GENERATE_PEPTIDES.out.versions.ifEmpty(null) ) - ch_versions = ch_versions.mix( SPLIT_PEPTIDES_PROTEIN.out.versions.ifEmpty(null) ) + ch_versions = ch_versions.mix(SPLIT_PEPTIDES_PROTEIN.out.versions.ifEmpty(null)) // split peptide data // TODO: Add the appropriate container to remove the warning @@ -345,6 +348,8 @@ workflow EPITOPEPREDICTION { .transpose(), EXTERNAL_TOOLS_IMPORT.out.nonfree_tools.collect().ifEmpty([]) ) + ch_versions = ch_versions.mix( EPYTOPE_PEPTIDE_PREDICTION_PROTEIN.out.versions.ifEmpty(null) ) + // Run epitope prediction for peptides EPYTOPE_PEPTIDE_PREDICTION_PEP( @@ -355,6 +360,8 @@ workflow EPITOPEPREDICTION { .transpose(), EXTERNAL_TOOLS_IMPORT.out.nonfree_tools.collect().ifEmpty([]) ) + ch_versions = ch_versions.mix( EPYTOPE_PEPTIDE_PREDICTION_PEP.out.versions.ifEmpty(null) ) + // Run epitope prediction for variants EPYTOPE_PEPTIDE_PREDICTION_VAR( @@ -366,11 +373,7 @@ workflow EPITOPEPREDICTION { .transpose(), EXTERNAL_TOOLS_IMPORT.out.nonfree_tools.collect().ifEmpty([]) ) - - // collect prediction script versions ch_versions = ch_versions.mix( EPYTOPE_PEPTIDE_PREDICTION_VAR.out.versions.ifEmpty(null) ) - ch_versions = ch_versions.mix( EPYTOPE_PEPTIDE_PREDICTION_PEP.out.versions.ifEmpty(null) ) - ch_versions = ch_versions.mix( EPYTOPE_PEPTIDE_PREDICTION_PROTEIN.out.versions.ifEmpty(null) ) // Combine the predicted files and save them in a branch to make a distinction between samples with single and multi files EPYTOPE_PEPTIDE_PREDICTION_PEP @@ -392,6 +395,8 @@ workflow EPITOPEPREDICTION { CAT_TSV( ch_predicted_peptides.single ) + ch_versions = ch_versions.mix( CAT_TSV.out.versions.ifEmpty(null) ) + CSVTK_CONCAT( ch_predicted_peptides.multi ) @@ -405,6 +410,7 @@ workflow EPITOPEPREDICTION { .mix( EPYTOPE_PEPTIDE_PREDICTION_VAR.out.fasta, EPYTOPE_PEPTIDE_PREDICTION_PROTEIN.out.fasta ) .groupTuple() ) + ch_versions = ch_versions.mix( CAT_FASTA.out.versions.ifEmpty(null) ) EPYTOPE_PEPTIDE_PREDICTION_PEP .out @@ -426,10 +432,11 @@ workflow EPITOPEPREDICTION { MERGE_JSON_SINGLE( ch_json_reports.single ) + ch_versions = ch_versions.mix( MERGE_JSON_SINGLE.out.versions.ifEmpty(null) ) + MERGE_JSON_MULTI( ch_json_reports.multi ) - ch_versions = ch_versions.mix( MERGE_JSON_SINGLE.out.versions.ifEmpty(null) ) ch_versions = ch_versions.mix( MERGE_JSON_MULTI.out.versions.ifEmpty(null) ) // From 27a029f297ffe9fd6b939de89543b60329d3e7a7 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Wed, 1 Mar 2023 10:08:28 +0100 Subject: [PATCH 66/78] fix empty prediction versions --- workflows/epitopeprediction.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/epitopeprediction.nf b/workflows/epitopeprediction.nf index 5d83f77..2356c1b 100644 --- a/workflows/epitopeprediction.nf +++ b/workflows/epitopeprediction.nf @@ -165,7 +165,7 @@ workflow EPITOPEPREDICTION { .collect() // get versions of all prediction tools - GET_PREDICTION_VERSIONS(ch_external_versions.ifEmpty(null)) + GET_PREDICTION_VERSIONS(ch_external_versions.ifEmpty("")) ch_prediction_tool_versions = GET_PREDICTION_VERSIONS.out.versions.ifEmpty(null) // TODO I guess it would be better to have two subworkflows for the if else parts (CM) From f239a7a2e8bb81eb635bcb9db4aaeae012372cb6 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Wed, 1 Mar 2023 10:43:53 +0100 Subject: [PATCH 67/78] fix split peptides warning --- conf/modules.config | 2 +- workflows/epitopeprediction.nf | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index bba5fea..8886348 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -62,7 +62,7 @@ process { ext.args = '' } - withName: SPLIT_PEPTIDES { + withName: SPLIT_PEPTIDES_PEPTIDES { ext.args = "--min_size ${params.peptides_split_minchunksize} --max_chunks ${params.peptides_split_maxchunks} " } diff --git a/workflows/epitopeprediction.nf b/workflows/epitopeprediction.nf index 2356c1b..463ef96 100644 --- a/workflows/epitopeprediction.nf +++ b/workflows/epitopeprediction.nf @@ -50,7 +50,7 @@ include { SNPSIFT_SPLIT} include { CSVTK_SPLIT} from '../modules/local/csvtk_split' include { EPYTOPE_GENERATE_PEPTIDES } from '../modules/local/epytope_generate_peptides' -include { SPLIT_PEPTIDES } from '../modules/local/split_peptides' +include { SPLIT_PEPTIDES as SPLIT_PEPTIDES_PEPTIDES } from '../modules/local/split_peptides' include { SPLIT_PEPTIDES as SPLIT_PEPTIDES_PROTEIN } from '../modules/local/split_peptides' include { EPYTOPE_PEPTIDE_PREDICTION as EPYTOPE_PEPTIDE_PREDICTION_PROTEIN } from '../modules/local/epytope_peptide_prediction' @@ -326,11 +326,10 @@ workflow EPITOPEPREDICTION { ch_versions = ch_versions.mix(SPLIT_PEPTIDES_PROTEIN.out.versions.ifEmpty(null)) // split peptide data - // TODO: Add the appropriate container to remove the warning - SPLIT_PEPTIDES( + SPLIT_PEPTIDES_PEPTIDES( ch_samples_uncompressed.peptide ) - ch_versions = ch_versions.mix( SPLIT_PEPTIDES.out.versions.ifEmpty(null) ) + ch_versions = ch_versions.mix( SPLIT_PEPTIDES_PEPTIDES.out.versions.ifEmpty(null) ) /* ======================================================================================== @@ -353,7 +352,7 @@ workflow EPITOPEPREDICTION { // Run epitope prediction for peptides EPYTOPE_PEPTIDE_PREDICTION_PEP( - SPLIT_PEPTIDES + SPLIT_PEPTIDES_PEPTIDES .out .splitted .combine( ch_prediction_tool_versions ) From becc335cb18bc9e924b5fc612af3a395525f60b9 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Wed, 1 Mar 2023 10:44:02 +0100 Subject: [PATCH 68/78] fix cat version --- modules/local/cat_files.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/cat_files.nf b/modules/local/cat_files.nf index c79116f..dabe8e8 100644 --- a/modules/local/cat_files.nf +++ b/modules/local/cat_files.nf @@ -24,7 +24,7 @@ process CAT_FILES { cat <<-END_VERSIONS > versions.yml "${task.process}": - cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*BusyBox //; s/ .*\$//') END_VERSIONS """ } From 73aabc6ba179c869e05a2dfc673d9f7a58608a4e Mon Sep 17 00:00:00 2001 From: ggabernet Date: Wed, 1 Mar 2023 17:06:39 +0100 Subject: [PATCH 69/78] fred2 to epytope --- bin/check_supported_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/check_supported_models.py b/bin/check_supported_models.py index 403359d..8edc5ba 100755 --- a/bin/check_supported_models.py +++ b/bin/check_supported_models.py @@ -6,7 +6,7 @@ import csv import argparse -from Fred2.EpitopePrediction import EpitopePredictorFactory +from epytope.EpitopePrediction import EpitopePredictorFactory def convert_allele_back(allele): From 3c0bc64d3535759e067e9872711e362fadae18e1 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Wed, 1 Mar 2023 19:48:51 +0100 Subject: [PATCH 70/78] Update CHANGELOG.md Co-authored-by: Christopher Mohr --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a3a496e..d18b4e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.2.0dev - WaldhausenOst - 2023-03-01 +## v2.2.0dev - WaldhaeuserOst - 2023-03-01 ### `Added` From e0f09bfb9a3f2c1c005bbf765531877ac0fbb031 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Wed, 1 Mar 2023 19:49:57 +0100 Subject: [PATCH 71/78] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a3a496e..ed5bcd6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.2.0dev - WaldhausenOst - 2023-03-01 +## v2.2.0 - WaldhausenOst - 2023-03-01 ### `Added` From b7093b78191f57655f36adfbf8fde23a73f2e9ed Mon Sep 17 00:00:00 2001 From: ggabernet Date: Wed, 1 Mar 2023 20:01:41 +0100 Subject: [PATCH 72/78] update changelog --- CHANGELOG.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e800765..a9adb0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v2.2.0 - WaldhaeuserOst - 2023-03-01 -3c0bc64d3535759e067e9872711e362fadae18e1 - ### `Added` - [#180](https://github.com/nf-core/epitopeprediction/pull/180) - Add support for `VEP` annotated VCF files [#172](https://github.com/nf-core/epitopeprediction/issues/172) From 70c99278b67671530edbf1789fbd8ed4124f1a62 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Fri, 3 Mar 2023 09:49:09 +0100 Subject: [PATCH 73/78] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9adb0a..c36c2b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.2.0 - WaldhaeuserOst - 2023-03-01 +## v2.2.0 - WaldhaeuserOst - 2023-03-03 ### `Added` From f8bf2917618f7f9caca219362fb0d01063472fba Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Fri, 3 Mar 2023 10:35:27 +0100 Subject: [PATCH 74/78] Update bin/epaa.py --- bin/epaa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/epaa.py b/bin/epaa.py index 422f2b8..3f66b82 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Written by Christopher Mohr and released under the MIT license. +# Written by Christopher Mohr and released under the MIT license (2022). import os import sys From f7fd61b0faa7b205be34fe16561fbcf44065ff68 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Fri, 3 Mar 2023 10:35:38 +0100 Subject: [PATCH 75/78] Update bin/split_vcf_by_variants.py --- bin/split_vcf_by_variants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/split_vcf_by_variants.py b/bin/split_vcf_by_variants.py index de73c5f..5c389f1 100755 --- a/bin/split_vcf_by_variants.py +++ b/bin/split_vcf_by_variants.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Written by Christopher Mohr and released under the MIT license. +# Written by Christopher Mohr and released under the MIT license (2022). import argparse import logging From baac306cc909d38508d7af54157e4776849ca195 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Fri, 3 Mar 2023 10:35:46 +0100 Subject: [PATCH 76/78] Update bin/merge_jsons.py --- bin/merge_jsons.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/merge_jsons.py b/bin/merge_jsons.py index aa469e3..75330ce 100755 --- a/bin/merge_jsons.py +++ b/bin/merge_jsons.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Written by Sabrina Krakau, Christopher Mohr, Gisela Gabernet and released under the MIT license. +# Written by Sabrina Krakau, Christopher Mohr, Gisela Gabernet and released under the MIT license (2022). import os From c695dda7d95bff025c623757aaa7cb9d4c79983c Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Fri, 3 Mar 2023 10:35:53 +0100 Subject: [PATCH 77/78] Update bin/gen_peptides.py --- bin/gen_peptides.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/gen_peptides.py b/bin/gen_peptides.py index a52aa14..50613a0 100755 --- a/bin/gen_peptides.py +++ b/bin/gen_peptides.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Written by Sabrina Krakau, Christopher Mohr and released under the MIT license. +# Written by Sabrina Krakau, Christopher Mohr and released under the MIT license (2022). import sys import argparse From 30ebe24c36ddf4f5eebdcb40eeeea14407c5adc6 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Fri, 3 Mar 2023 10:36:00 +0100 Subject: [PATCH 78/78] Update bin/split_peptides.py --- bin/split_peptides.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/split_peptides.py b/bin/split_peptides.py index 2ac4e88..5c96dad 100755 --- a/bin/split_peptides.py +++ b/bin/split_peptides.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Written by Sabrina Krakau, Christopher Mohr and released under the MIT license. +# Written by Sabrina Krakau, Christopher Mohr and released under the MIT license (2022). import math