From 20e7d64b799728b2cad3ec74a11764cb13ea6585 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Wed, 22 Nov 2023 16:10:55 +0100 Subject: [PATCH 1/5] chore: update krakenuniq/preloadedkrakenuniq module --- modules.json | 2 +- .../preloadedkrakenuniq/environment.yml | 7 + .../krakenuniq/preloadedkrakenuniq/main.nf | 87 +++++---- .../krakenuniq/preloadedkrakenuniq/meta.yml | 29 +-- .../preloadedkrakenuniq/tests/main.nf.test | 77 ++++++++ .../tests/main.nf.test.snap | 172 ++++++++++++++++++ .../preloadedkrakenuniq/tests/tags.yml | 2 + 7 files changed, 324 insertions(+), 52 deletions(-) create mode 100644 modules/nf-core/krakenuniq/preloadedkrakenuniq/environment.yml create mode 100644 modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test create mode 100644 modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap create mode 100644 modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/tags.yml diff --git a/modules.json b/modules.json index aa63d31d..d220eecd 100644 --- a/modules.json +++ b/modules.json @@ -143,7 +143,7 @@ }, "krakenuniq/preloadedkrakenuniq": { "branch": "master", - "git_sha": "451df5258e0c03cb4e835bb02c22a9a31b44eb0d", + "git_sha": "8bbaa881ab9e59f3e18680550d65d52339640630", "installed_by": ["modules"] }, "krona/ktimporttaxonomy": { diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/environment.yml b/modules/nf-core/krakenuniq/preloadedkrakenuniq/environment.yml new file mode 100644 index 00000000..11bbb879 --- /dev/null +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/environment.yml @@ -0,0 +1,7 @@ +name: krakenuniq_preloadedkrakenuniq +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::krakenuniq=1.0.4 diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf index 7083d3f0..59055bdb 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf @@ -2,7 +2,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { tag "$meta.id" label 'process_high' - conda "bioconda::krakenuniq=1.0.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/krakenuniq:1.0.4--pl5321h19e8d03_0': 'biocontainers/krakenuniq:1.0.4--pl5321h19e8d03_0' }" @@ -11,17 +11,16 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { tuple val(meta), path(fastqs) path db val ram_chunk_size - val save_output_fastqs + val save_output_reads val report_file val save_output output: - tuple val(meta), path('*.classified{.,_}*') , optional:true, emit: classified_reads_fastq - tuple val(meta), path('*.unclassified{.,_}*') , optional:true, emit: unclassified_reads_fastq - tuple val(meta), path('*classified.txt') , optional:true, emit: classified_assignment - tuple val(meta), path('*report.txt') , emit: report - - path "versions.yml" , emit: versions + tuple val(meta), path('*.classified.fasta.gz') , optional:true, emit: classified_reads_fasta + tuple val(meta), path('*.unclassified.fasta.gz') , optional:true, emit: unclassified_reads_fasta + tuple val(meta), path('*.krakenuniq.classified.txt'), optional:true, emit: classified_assignment + tuple val(meta), path('*.krakenuniq.report.txt') , emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -30,21 +29,21 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { def args = task.ext.args ?: '' def args2 = task.ext.args ?: '' - def classified = meta.single_end ? '"\${PREFIX}.classified.fastq"' : '"\${PREFIX}.classified#.fastq"' - def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fastq"' : '"\${PREFIX}.unclassified#.fastq"' - def classified_option = save_output_fastqs ? "--classified-out ${classified}" : '' - def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : '' + def classified = meta.single_end ? '"\${PREFIX}.classified.fasta"' : '"\${PREFIX}.merged.classified.fasta"' + def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fasta"' : '"\${PREFIX}.merged.unclassified.fasta"' + def classified_option = save_output_reads ? "--classified-out ${classified}" : '' + def unclassified_option = save_output_reads ? "--unclassified-out ${unclassified}" : '' def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : '' def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : '' - def compress_reads_command = save_output_fastqs ? 'gzip --no-name *.fastq' : '' + def compress_reads_command = save_output_reads ? 'gzip --no-name *.fasta' : '' if (meta.single_end) { """ krakenuniq \\ + $args \\ --db $db \\ --preload \\ --preload-size $ram_chunk_size \\ - --threads $task.cpus \\ - $args + --threads $task.cpus strip_suffix() { local result=\$1 @@ -62,7 +61,6 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { $output_option \\ $unclassified_option \\ $classified_option \\ - $output_option \\ $args2 \\ "\${FASTQ}" done @@ -77,11 +75,11 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { } else { """ krakenuniq \\ + $args \\ --db $db \\ --preload \\ --preload-size $ram_chunk_size \\ - --threads $task.cpus \\ - $args + --threads $task.cpus strip_suffix() { local result @@ -102,7 +100,6 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { $output_option \\ $unclassified_option \\ $classified_option \\ - $output_option \\ --paired \\ $args2 \\ "\${FASTQ[@]}" @@ -121,21 +118,21 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { def args = task.ext.args ?: '' def args2 = task.ext.args ?: '' - def classified = meta.single_end ? '"\${PREFIX}.classified.fastq"' : '"\${PREFIX}.classified#.fastq"' - def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fastq"' : '"\${PREFIX}.unclassified#.fastq"' - def classified_option = save_output_fastqs ? "--classified-out ${classified}" : '' - def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : '' + def classified = meta.single_end ? '"\${PREFIX}.classified.fasta"' : '"\${PREFIX}.merged.classified.fasta"' + def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fasta"' : '"\${PREFIX}.merged.unclassified.fasta"' + def classified_option = save_output_reads ? "--classified-out ${classified}" : '' + def unclassified_option = save_output_reads ? "--unclassified-out ${unclassified}" : '' def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : '' def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : '' - def compress_reads_command = save_output_fastqs ? 'gzip --no-name *.fastq' : '' + def compress_reads_command = save_output_reads ? 'gzip --no-name *.fasta' : '' if (meta.single_end) { """ echo krakenuniq \\ + $args \\ --db $db \\ --preload \\ --preload-size $ram_chunk_size \\ - --threads $task.cpus \\ - $args + --threads $task.cpus strip_suffix() { local result=\$1 @@ -143,6 +140,14 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { echo "\${result%%.*}" } + create_file() { + echo '<3 nf-core' > "\$1" + } + + create_gzip_file() { + echo '<3 nf-core' | gzip -n > "\$1" + } + printf "%s\\n" ${fastqs} | while read FASTQ; do \\ echo "\${FASTQ}" PREFIX="\$(strip_suffix "\${FASTQ}")" @@ -155,14 +160,13 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { $output_option \\ $unclassified_option \\ $classified_option \\ - $output_option \\ $args2 \\ "\${FASTQ}" - touch "\${PREFIX}.classified.fastq.gz" - touch "\${PREFIX}.krakenuniq.classified.txt" - touch "\${PREFIX}.krakenuniq.report.txt" - touch "\${PREFIX}.unclassified.fastq.gz" + create_file "\${PREFIX}.krakenuniq.classified.txt" + create_file "\${PREFIX}.krakenuniq.report.txt" + create_gzip_file "\${PREFIX}.classified.fasta.gz" + create_gzip_file "\${PREFIX}.unclassified.fasta.gz" done echo $compress_reads_command @@ -175,11 +179,11 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { } else { """ echo krakenuniq \\ + $args \\ --db $db \\ --preload \\ --preload-size $ram_chunk_size \\ - --threads $task.cpus \\ - $args + --threads $task.cpus strip_suffix() { local result @@ -189,6 +193,14 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { echo "\${result%.}" } + create_file() { + echo '<3 nf-core' > "\$1" + } + + create_gzip_file() { + echo '<3 nf-core' | gzip -n > "\$1" + } + printf "%s %s\\n" ${fastqs} | while read FASTQ; do \\ read -r -a FASTQ <<< "\${FASTQ}" echo "\${FASTQ[@]}" @@ -202,15 +214,14 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { $output_option \\ $unclassified_option \\ $classified_option \\ - $output_option \\ --paired \\ $args2 \\ "\${FASTQ[@]}" - touch "\${PREFIX}.classified_1.fastq.gz" "\${PREFIX}.classified_2.fastq.gz" - touch "\${PREFIX}.krakenuniq.classified.txt" - touch "\${PREFIX}.krakenuniq.report.txt" - touch "\${PREFIX}.unclassified_1.fastq.gz" "\${PREFIX}.unclassified_2.fastq.gz" + create_file "\${PREFIX}.krakenuniq.classified.txt" + create_file "\${PREFIX}.krakenuniq.report.txt" + create_gzip_file "\${PREFIX}.merged.classified.fasta.gz" + create_gzip_file "\${PREFIX}.merged.unclassified.fasta.gz" done echo $compress_reads_command diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml b/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml index 41620c7b..4a6dffee 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml @@ -13,7 +13,6 @@ tools: documentation: https://github.com/fbreitwieser/krakenuniq doi: 10.1186/s13059-018-1568-0 licence: ["MIT"] - input: - meta: type: map @@ -30,11 +29,11 @@ input: type: string description: Amount of maximum amount of RAM each chunk of database that should be loaded at any one time pattern: "*GB" - - save_output_fastqs: + - save_output_reads: type: boolean description: | - If true, optional commands are added to save classified and unclassified reads - as fastq files + Optionally commands are added to save classified and unclassified reads as FASTA files. + When the input is paired-end, the single output FASTA contains merged reads. - save_reads_assignment: type: boolean description: | @@ -46,29 +45,30 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - classified_reads_fastq: + - classified_reads_fasta: type: file description: | Reads classified as belonging to any of the taxa - on the KrakenUniq database. - pattern: "*.fastq.gz" - - unclassified_reads_fastq: + in the KrakenUniq reference database. + pattern: "*.classified.fasta.gz" + - unclassified_reads_fasta: type: file description: | Reads not classified to any of the taxa - on the KrakenUniq database. - pattern: "*.fastq.gz" + in the KrakenUniq reference database. + pattern: "*.unclassified.fasta.gz" - classified_assignment: type: file description: | KrakenUniq output file indicating the taxonomic assignment of each input read ## DOUBLE CHECK!! + pattern: "*.krakenuniq.classified.txt" - report: type: file description: | - KrakenUniq report containing stats about classified - and not classifed reads. - pattern: "*.report.txt" + KrakenUniq report containing statistics about classified + and unclassified reads. + pattern: "*.krakenuniq.report.txt" - versions: type: file description: File containing software versions @@ -76,3 +76,6 @@ output: authors: - "@mjamy" - "@Midnighter" +maintainers: + - "@mjamy" + - "@Midnighter" diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test new file mode 100644 index 00000000..a7c44707 --- /dev/null +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test @@ -0,0 +1,77 @@ +nextflow_process { + + name "Test Process KRAKENUNIQ_PRELOADEDKRAKENUNIQ" + script "../main.nf" + process "KRAKENUNIQ_PRELOADEDKRAKENUNIQ" + tag "modules" + tag "modules_nfcore" + tag "krakenuniq" + tag "krakenuniq/preloadedkrakenuniq" + + test("sarscov2 - Illumina FASTQ single - stub-run") { + options "-stub-run" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id:'test', single_end:true], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = [] + input[2] = '8GB' + input[3] = true + input[4] = true + input[5] = true + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("sarscov2 - Illumina FASTQ paired-end - stub-run") { + options "-stub-run" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id:'test', single_end:false], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = [] + input[2] = '8GB' + input[3] = true + input[4] = true + input[5] = true + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } +} diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap new file mode 100644 index 00000000..970865bd --- /dev/null +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap @@ -0,0 +1,172 @@ +{ + "sarscov2 - Illumina FASTQ paired-end - stub-run": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.classified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.unclassified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.krakenuniq.report.txt:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + "4": [ + "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" + ], + "classified_assignment": [ + [ + { + "id": "test", + "single_end": false + }, + "test.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + "classified_reads_fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.classified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" + ] + ], + "report": [ + [ + { + "id": "test", + "single_end": false + }, + "test.krakenuniq.report.txt:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + "unclassified_reads_fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.unclassified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" + ] + ], + "versions": [ + "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" + ] + } + ], + "timestamp": "2023-11-21T15:38:47.810576872" + }, + "sarscov2 - Illumina FASTQ single - stub-run": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1.classified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1.unclassified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1.krakenuniq.report.txt:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + "4": [ + "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" + ], + "classified_assignment": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + "classified_reads_fasta": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1.classified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" + ] + ], + "report": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1.krakenuniq.report.txt:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + "unclassified_reads_fasta": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1.unclassified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" + ] + ], + "versions": [ + "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" + ] + } + ], + "timestamp": "2023-11-21T15:38:42.894597091" + } +} \ No newline at end of file diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/tags.yml b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/tags.yml new file mode 100644 index 00000000..35ffde4d --- /dev/null +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/tags.yml @@ -0,0 +1,2 @@ +krakenuniq/preloadedkrakenuniq: + - modules/nf-core/krakenuniq/preloadedkrakenuniq/** From cc3bdea17989604a78a0b84d4274189786495f1e Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Wed, 22 Nov 2023 16:36:32 +0100 Subject: [PATCH 2/5] docs: describe changes in krakenuniq output --- docs/output.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/output.md b/docs/output.md index 11b07fe5..cf4678c3 100644 --- a/docs/output.md +++ b/docs/output.md @@ -375,23 +375,23 @@ You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply ### KrakenUniq -[KrakenUniq](https://github.com/fbreitwieser/krakenuniq) (formerly KrakenHLL) is an extenson to the fast k-mer-based classification [Kraken](https://github.com/DerrickWood/kraken) with an efficient algorithm for additionally assessing the coverage of unique k-mers found in each species in a dataset. +[KrakenUniq](https://github.com/fbreitwieser/krakenuniq) (formerly KrakenHLL) is an extension to the fast k-mer-based classification performed by [Kraken](https://github.com/DerrickWood/kraken) with an efficient algorithm for additionally assessing the coverage of unique k-mers found in each species in a dataset.
Output files - `krakenuniq/` - `/` - - `_.classified.fastq.gz`: FASTQ file containing all reads that had a hit against a reference in the database for a given sample - - `_.unclassified.fastq.gz`: FASTQ file containing all reads that did not have a hit in the database for a given sample - - `_.report.txt`: A Kraken2-style report that summarises the fraction abundance, taxonomic ID, number of Kmers, taxonomic path of all the hits, with an additional column for k-mer coverage, that allows for more accurate distinguishing between false-positive/true-postitive hits - - `_.classifiedreads.txt`: A list of read IDs and the hits each read had against each database for a given sample + - `_[.merged].classified.fasta.gz`: Optional FASTA file containing all reads that had a hit against a reference in the database for a given sample. Paired-end input reads are merged in this output. + - `_[.merged].unclassified.fasta.gz`: Optional FASTA file containing all reads that did not have a hit in the database for a given sample. Paired-end input reads are merged in this output. + - `_.krakenuniq.report.txt`: A Kraken2-style report that summarises the fraction abundance, taxonomic ID, number of Kmers, taxonomic path of all the hits, with an additional column for k-mer coverage, that allows for more accurate distinguishing between false-positive/true-postitive hits. + - `_.krakenuniq.classified.txt`: An optional list of read IDs and the hits each read had against each database for a given sample.
-The main taxonomic classification file from KrakenUniq is the `*report.txt` file. This is an extension of the Kraken2 report with the additional k-mer coverage information that provides more information about the accuracy of hits. +The main taxonomic classification file from KrakenUniq is the `*.krakenuniq.report.txt` file. This is an extension of the Kraken2 report with the additional k-mer coverage information that provides more information about the accuracy of hits. -You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply `--krakenuniq_save_reads` and/or `--krakenuniq_save_readclassification` parameters to the pipeline. +You will only receive the `.fasta.gz` and `*.krakenuniq.classified.txt` file if you supply `--krakenuniq_save_reads` and/or `--krakenuniq_save_readclassification` parameters to the pipeline. :::info The output system of KrakenUniq can result in other `stdout` or `stderr` logging information being saved in the report file, therefore you must check your report files before downstream use! From c8cb7c16e857ae06a743c137588488ee6d918701 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Wed, 22 Nov 2023 16:36:54 +0100 Subject: [PATCH 3/5] docs: make a changelog entry --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d3fb99ba..6fde8e29 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased + +### `Fixed` + +- [#421](https://github.com/nf-core/taxprofiler/pull/421) Updated the krakenuniq/preloadedkrakenuniq module that contained a fix for saving the output reads (❤️ to @SannaAb for reporting, fix by @Midnighter) + ## v1.1.2 - Augmented Akita Patch [2023-10-27] ### `Added` From 8f7105332169408072a3c103ba22b2382842319f Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Thu, 23 Nov 2023 13:31:18 +0100 Subject: [PATCH 4/5] fix: adjust publishing pattern to allow FASTA --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 90ca6919..9c269997 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -500,7 +500,7 @@ process { publishDir = [ path: { "${params.outdir}/krakenuniq/${meta.db_name}/" }, mode: params.publish_dir_mode, - pattern: '*.{txt,fastq.gz}' + pattern: '*.{txt,fasta.gz}' ] } From bcb0539cb6974ffaea9ce69091b151022e28ed5a Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Thu, 23 Nov 2023 13:41:15 +0100 Subject: [PATCH 5/5] fix: adjust schema to correct FASTA output --- nextflow_schema.json | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 5897da32..23cad833 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -331,7 +331,7 @@ "type": "boolean", "fa_icon": "fas fa-save", "description": "Save reads from samples that went through the run-merging step", - "help_text": "Save the run- and library-concatenated reads of a given sample in FASTQ format.\n\n> \u26a0\ufe0f Only samples that went through the run-merging step of the pipeline will be stored in the resulting directory. \n\nIf you wish to save the files that go to the classification/profiling steps for samples that _did not_ go through run merging, you must supply the appropriate upstream `--save_` flag.\n\n" + "help_text": "Save the run- and library-concatenated reads of a given sample in FASTQ format.\n\n> ⚠️ Only samples that went through the run-merging step of the pipeline will be stored in the resulting directory. \n\nIf you wish to save the files that go to the classification/profiling steps for samples that _did not_ go through run merging, you must supply the appropriate upstream `--save_` flag.\n\n" } }, "fa_icon": "fas fa-clipboard-check" @@ -417,32 +417,32 @@ "run_krakenuniq": { "type": "boolean", "fa_icon": "fas fa-toggle-on", - "description": "Turn on profiling with KrakenUniq. Requires database to be present CSV file passed to --databases" + "description": "Turn on profiling with KrakenUniq. Requires one or more KrakenUniq databases to be present in the CSV file passed to --databases." }, "krakenuniq_save_reads": { "type": "boolean", "fa_icon": "fas fa-save", - "description": "Turn on saving of KrakenUniq-aligned reads", - "help_text": "Save reads that do and do not have a taxonomic classification in your output results directory in FASTQ format.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--classified-out` and `--unclassified-out`" + "description": "Turn on saving of KrakenUniq (un-)classified reads as FASTA.", + "help_text": "Save reads that do and do not have a taxonomic classification in your output results directory in FASTA format. Reads from paired-end input are merged.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--classified-out` and `--unclassified-out`" }, "krakenuniq_ram_chunk_size": { "type": "string", "default": "16G", - "description": "Specify how large to chunk database when loading into memory for KrakenUniq", + "description": "Specify how large to chunk the database when loading into memory for KrakenUniq.", "fa_icon": "fas fa-database", "help_text": "nf-core/taxprofiler utilises a 'low memory' option for KrakenUniq that can reduce the amount of RAM the process requires using the `--preloaded` option.\n\nA further extension to this option is that you can specify how large each chunk of the database should be that gets loaded into memory at any one time. You can specify the amount of RAM to chunk the database to with this parameter, and is particularly useful for people with limited computational resources.\n\nMore information about this parameter can be seen [here](https://github.com/fbreitwieser/krakenuniq/blob/master/README.md#new-release-v07).\n\n> Modifies KrakenUniq parameter: --preload-size\n\n" }, "krakenuniq_save_readclassifications": { "type": "boolean", "fa_icon": "fas fa-save", - "description": "Turn on saving of KrakenUniq per-read taxonomic assignment file", - "help_text": "Save a text file that contains a list of each read that had a taxonomic assignment, with information on specific taxonomic taxonomic assignment that that read recieved.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--output`" + "description": "Turn on saving of KrakenUniq per-read taxonomic assignment file.", + "help_text": "Save a text file that contains a list of each read that had a taxonomic assignment, with information on specific taxonomic taxonomic assignment that that read received.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--output`" }, "krakenuniq_batch_size": { "type": "integer", "default": 20, "fa_icon": "far fa-window-restore", - "description": "Specify the number of samples for each KrakenUniq run", + "description": "Specify the number of samples for each KrakenUniq run.", "help_text": "Specify the batch size for KrakenUniq. The reference database for KrakenUniq is loaded into memory once per nextflow process and then used to classify many samples. When you have many samples, a single KrakenUniq run can be rather slow. Alternatively, we can split up KrakenUniq runs for a 'batch' of samples, allowing a balance between shared use of database for multiple samples, but also faster parallelised KrakenUniq runs. This parameter determines for how many samples at a time." }, "run_bracken": {