diff --git a/CHANGELOG.md b/CHANGELOG.md index 0375bcb..6a0e6ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#50](https://github.com/Ferlab-Ste-Justine/Post-processing-Pipeline/pull/50) Use container tag 1.20 for splitMultiAllelics process - [#51](https://github.com/Ferlab-Ste-Justine/Post-processing-Pipeline/pull/51) Add missing ressources for exomiser process in configuration +- [#52](https://github.com/Ferlab-Ste-Justine/Post-processing-Pipeline/pull/52) Ensure .gvcf file extensions are supported in all scenarios ### `Known issues` - The nf-core modules that we are using have a potential performance flaw. Typically, the regex used to describe the output files also match the input files (ex: "*.vcf"), which can cause unnecessary file transfers. This has already proven to cause issues on fusion. One fix could be to transfer the whole modules to local to perform the small change necessary to fix this. diff --git a/README.md b/README.md index 0f2ce45..cb4265f 100644 --- a/README.md +++ b/README.md @@ -14,15 +14,16 @@ It performs joint genotyping, tags low-quality variants, and optionally annotates the final vcf data using vep and/or prioritize variant using exomiser. ### Summary: -1. Remove MNPs using bcftools -2. Normalize .gvcf -3. Combine .gvcf -4. [Joint-genotyping](https://gatk.broadinstitute.org/hc/en-us/articles/360037057852-GenotypeGVCFs) -5. Tag false positive variants with either: +1. Standardize input vcf files using bcftools view +2. Remove MNPs using bcftools +3. Normalize .gvcf +4. Combine .gvcf +5. [Joint-genotyping](https://gatk.broadinstitute.org/hc/en-us/articles/360037057852-GenotypeGVCFs) +6. Tag false positive variants with either: - For whole genome sequencing data: [Variant quality score recalibration (VQSR)](https://gatk.broadinstitute.org/hc/en-us/articles/360036510892-VariantRecalibrator) - For whole exome sequencing data: [Hard-Filtering](https://gatk.broadinstitute.org/hc/en-us/articles/360036733451-VariantFiltration) -6. Optionnally annotate variants with [Variant effect predictor (VEP)](https://useast.ensembl.org/info/docs/tools/vep/index.html) -7. Optionnally integrate phenotype data to annotate, filter and prioritise variants likely to be disease-causing with [exomiser](https://www.sanger.ac.uk/tool/exomiser/) +7. Optionnally annotate variants with [Variant effect predictor (VEP)](https://useast.ensembl.org/info/docs/tools/vep/index.html) +8. Optionnally integrate phenotype data to annotate, filter and prioritise variants likely to be disease-causing with [exomiser](https://www.sanger.ac.uk/tool/exomiser/) diff --git a/conf/modules.config b/conf/modules.config index 0e9cea6..611be0e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -26,6 +26,12 @@ process { publishDir = new_publish_dir() + withName: BCFTOOLS_VIEW { + container = 'staphb/bcftools:1.20' + ext.args = { '-Oz --write-index=tbi' } + ext.prefix = {meta.id + ".standardized.g"} + } + withName: BCFTOOLS_FILTER { container = 'staphb/bcftools:1.20' ext.args = {'-e \'strlen(REF)>1 & strlen(REF)==strlen(ALT) & TYPE="snp"\' -Oz --write-index=tbi'} diff --git a/docs/images/ferlab_workflow.png b/docs/images/ferlab_workflow.png index 2fcc3a5..2f4b6b7 100644 Binary files a/docs/images/ferlab_workflow.png and b/docs/images/ferlab_workflow.png differ diff --git a/docs/images/ferlab_workflow.svg b/docs/images/ferlab_workflow.svg index ede93e0..16ab198 100644 --- a/docs/images/ferlab_workflow.svg +++ b/docs/images/ferlab_workflow.svg @@ -7,8 +7,8 @@ viewBox="7 0 382.58899 139.24499" version="1.1" id="svg5" - inkscape:version="1.3.1 (9b9bdc1480, 2023-11-25, custom)" - sodipodi:docname="ferlab_dna_workflow.svg" + inkscape:version="1.2.2 (b0a8486, 2022-12-01)" + sodipodi:docname="ferlab_workflow_new.svg" inkscape:export-filename="sarek_subway.png" inkscape:export-xdpi="600" inkscape:export-ydpi="600" @@ -32,15 +32,15 @@ inkscape:pagecheckerboard="false" inkscape:document-units="mm" showgrid="false" - inkscape:zoom="0.7071068" - inkscape:cx="994.19211" - inkscape:cy="-151.32085" - inkscape:window-width="2494" - inkscape:window-height="1531" - inkscape:window-x="66" - inkscape:window-y="32" - inkscape:window-maximized="1" - inkscape:current-layer="layer4" + inkscape:zoom="0.39653515" + inkscape:cx="706.11646" + inkscape:cy="316.49149" + inkscape:window-width="1609" + inkscape:window-height="799" + inkscape:window-x="41" + inkscape:window-y="153" + inkscape:window-maximized="0" + inkscape:current-layer="layer1" width="211mm" fit-margin-top="0" fit-margin-left="0" @@ -64,13 +64,13 @@ x="0" y="0" width="382.58899" - height="139.24498" + height="139.245" id="page1" margin="500 500.00003 500 16.263456" bleed="0" - inkscape:export-filename="sarek_subway.png" - inkscape:export-xdpi="600" - inkscape:export-ydpi="600" />mappingbcftools normconcatenatesamtoolsgenotypeGVCFhardfilteringVEPexomiserexomiserVQSRVQSRWGSWGSWESWESpost-processingpost-processingnf-core/sarek core workflow + id="tspan3093">nf-core/sarek core workflow convertconvertbcftools view versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : "" + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/view/meta.yml b/modules/nf-core/bcftools/view/meta.yml new file mode 100644 index 0000000..aa7785f --- /dev/null +++ b/modules/nf-core/bcftools/view/meta.yml @@ -0,0 +1,88 @@ +name: bcftools_view +description: View, subset and filter VCF or BCF files by position and filtering expression. + Convert between VCF and BCF +keywords: + - variant calling + - view + - bcftools + - VCF +tools: + - view: + description: | + View, subset and filter VCF or BCF files by position and filtering expression. Convert between VCF and BCF + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be inspected. + e.g. 'file.vcf' + - index: + type: file + description: | + The tab index for the VCF file to be inspected. + e.g. 'file.tbi' + - - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. + e.g. 'file.vcf' + - - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon index files) + e.g. 'file.vcf' + - - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: VCF normalized output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" +maintainers: + - "@abhi18av" diff --git a/modules/nf-core/bcftools/view/tests/main.nf.test b/modules/nf-core/bcftools/view/tests/main.nf.test new file mode 100644 index 0000000..1e60c50 --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/main.nf.test @@ -0,0 +1,298 @@ +nextflow_process { + + name "Test Process BCFTOOLS_VIEW" + script "../main.nf" + process "BCFTOOLS_VIEW" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/view" + + test("sarscov2 - [vcf, tbi], [], [], []") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], vcf, tsv, []") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/view/tests/main.nf.test.snap b/modules/nf-core/bcftools/view/tests/main.nf.test.snap new file mode 100644 index 0000000..fec22e3 --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/main.nf.test.snap @@ -0,0 +1,333 @@ +{ + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ], + "csi": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T12:14:38.717458272" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + + ], + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.tbi" + ] + ], + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T12:13:44.760671384" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ], + "csi": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T16:06:21.669668533" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T12:14:53.026083914" + }, + "sarscov2 - [vcf, tbi], vcf, tsv, []": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out.vcf:md5,1bcbd0eff25d316ba915d06463aab17b" + ] + ], + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:14.663512924" + }, + "sarscov2 - [vcf, tbi], [], [], [] - stub": { + "content": [ + "out.vcf", + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:19.723448323" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi" + ] + ], + [ + + ], + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T08:24:36.358469315" + }, + "sarscov2 - [vcf, tbi], [], [], []": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out.vcf:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:09.588867653" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi" + ] + ], + [ + + ], + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T12:13:33.834986869" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/view/tests/nextflow.config b/modules/nf-core/bcftools/view/tests/nextflow.config new file mode 100644 index 0000000..932e3ba --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = '--no-version --output-type v' +} diff --git a/modules/nf-core/bcftools/view/tests/tags.yml b/modules/nf-core/bcftools/view/tests/tags.yml new file mode 100644 index 0000000..43b1f0a --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/view: + - "modules/nf-core/bcftools/view/**" diff --git a/modules/nf-core/bcftools/view/tests/vcf_gz_index.config b/modules/nf-core/bcftools/view/tests/vcf_gz_index.config new file mode 100644 index 0000000..7dd696e --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index --no-version" +} diff --git a/modules/nf-core/bcftools/view/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/view/tests/vcf_gz_index_csi.config new file mode 100644 index 0000000..aebffb6 --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=csi --no-version" +} diff --git a/modules/nf-core/bcftools/view/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/view/tests/vcf_gz_index_tbi.config new file mode 100644 index 0000000..b192ae7 --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/vcf_gz_index_tbi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=tbi --no-version" +} diff --git a/nextflow.config b/nextflow.config index d8aaa3b..c8cde79 100644 --- a/nextflow.config +++ b/nextflow.config @@ -232,7 +232,7 @@ process { container = 'staphb/htslib:1.19' } //see conf/base.config for the performance options of defined by nf-core standards - withName: 'BCFTOOLS_FILTER|BCFTOOLS_NORM' { + withName: 'BCFTOOLS_FILTER|BCFTOOLS_NORM|BCFTOOLS_VIEW' { errorStrategy = 'retry' maxRetries = 1 cpus = { check_max( 2 * task.attempt, 'cpus' ) } diff --git a/workflows/postprocessing.nf b/workflows/postprocessing.nf index 5c985ba..2579cb6 100644 --- a/workflows/postprocessing.nf +++ b/workflows/postprocessing.nf @@ -10,11 +10,11 @@ include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pi include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { EXCLUDE_MNPS } from "../subworkflows/local/exclude_mnps" include { VQSR } from "../subworkflows/local/vqsr" +include { BCFTOOLS_VIEW } from '../modules/nf-core/bcftools/view/main' include { EXOMISER } from '../modules/local/exomiser' include { splitMultiAllelics } from '../modules/local/split_multi_allelics' include { ENSEMBLVEP_VEP } from '../modules/nf-core/ensemblvep/vep/main' include { tabix as vep_tabix } from '../modules/local/tabix' -include { tabix as initial_tabix } from '../modules/local/tabix' include { COMBINEGVCFS } from '../modules/local/combine_gvcfs' include { GATK4_GENOTYPEGVCFS } from '../modules/nf-core/gatk4/genotypegvcfs' include { GATK4_VARIANTFILTRATION} from '../modules/nf-core/gatk4/variantfiltration' @@ -137,16 +137,28 @@ process writemeta{ """ } -// We assume that the input gvcf files are indexed -def replicate_excludemnps_output_format(input_channel) { - def with_tbi = input_channel.filter{meta, vcf -> file(vcf + ".tbi").exists()} - .map{meta, vcf -> [meta, [file(vcf), file(vcf + ".tbi")]]} - def tbi_input = input_channel.filter{meta, vcf -> !file(vcf + ".tbi").exists()} - def tbi_output = initial_tabix(tbi_input) - def with_generated_tbi = tbi_input.join(tbi_output).map{meta, vcf, tbi -> [meta, [vcf, tbi]]} +def handle_mnps(input_channel, do_exclude_mnps) { + if(!do_exclude_mnps) { + return input_channel.map{meta, vcf, tbi -> [meta, [vcf, tbi]]} + } + def ch_input_excludemnps = input_channel.map{meta, vcf, tbi -> [meta, vcf]} + return EXCLUDE_MNPS(ch_input_excludemnps).ch_output_excludemnps +} - return with_tbi.concat(with_generated_tbi) + +/* + Deal with variations in input file formats, extensions, and the presence or absence of index files. + input: [meta, vcf] + output: [meta, vcf, tbi] +*/ +def standardize_input_vcf_files(input_channel) { + def view_input = input_channel.map{meta, vcf -> + def tbi = file(vcf + ".tbi") + [meta, vcf, tbi.exists() ? tbi: []] + } + def view_output = BCFTOOLS_VIEW(view_input, [], [], []) + return view_output.vcf.join(view_output.tbi) } workflow POSTPROCESSING { @@ -176,11 +188,12 @@ workflow POSTPROCESSING { writemeta() - def ch_output_from_excludemnps = params.exclude_mnps ? - EXCLUDE_MNPS(ch_samplesheet).ch_output_excludemnps : - replicate_excludemnps_output_format(ch_samplesheet) + + def ch_samplesheet_standard = standardize_input_vcf_files(ch_samplesheet) + + def ch_output_from_handle_mnps = handle_mnps(ch_samplesheet_standard, params.exclude_mnps) - def grouped_by_family = ch_output_from_excludemnps + def grouped_by_family = ch_output_from_handle_mnps //Create groupkey for the grouptuple and separate the vcf (file[0]) and the index (files[1]) .map{meta, files -> tuple(groupKey(meta.familyId, meta.sampleSize),meta,files[0],files[1])} .groupTuple()