From ee09756a1b27ba001c17d4f3430ee25055fd5ab9 Mon Sep 17 00:00:00 2001 From: riasc Date: Fri, 21 Jun 2024 21:02:20 -0500 Subject: [PATCH] separated samtools, bcftools and realign envs to avoid conflicts --- CHANGELOG.md | 6 +++ workflow/envs/bcftools.yml | 6 +++ workflow/envs/realign.yml | 7 ++++ workflow/envs/samtools.yml | 6 +-- workflow/rules/align.smk | 4 +- workflow/rules/all.smk | 0 workflow/rules/altsplicing.smk | 4 +- workflow/rules/custom.smk | 2 +- workflow/rules/exitron.smk | 4 +- workflow/rules/germline.smk | 16 ++++---- workflow/rules/indel.smk | 20 +++++----- workflow/rules/postproc.smk | 71 ---------------------------------- 12 files changed, 47 insertions(+), 99 deletions(-) create mode 100644 workflow/envs/bcftools.yml create mode 100644 workflow/envs/realign.yml delete mode 100644 workflow/rules/all.smk delete mode 100644 workflow/rules/postproc.smk diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ff3411..b313e29 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Prioritization of neoantigens is now done separately for each variant type (speeds up the process) - NMD information (e.g., escape rule,...) is now also calculated for all variants +## [0.2.7] - 2024-06-23 + +### Fix + +- Separated samtools, bcftools and realign environments to avoid conflicts + ## [0.2.6] - 2024-06-20 ### Fix diff --git a/workflow/envs/bcftools.yml b/workflow/envs/bcftools.yml new file mode 100644 index 0000000..3129665 --- /dev/null +++ b/workflow/envs/bcftools.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - bcftools=1.20 diff --git a/workflow/envs/realign.yml b/workflow/envs/realign.yml new file mode 100644 index 0000000..d53b369 --- /dev/null +++ b/workflow/envs/realign.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - bwa=0.7.18 + - samtools=1.20 diff --git a/workflow/envs/samtools.yml b/workflow/envs/samtools.yml index 556860b..906f443 100644 --- a/workflow/envs/samtools.yml +++ b/workflow/envs/samtools.yml @@ -1,6 +1,6 @@ channels: + - conda-forge - bioconda + - nodefaults dependencies: - - samtools=1.9 - - bcftools=1.9 - + - samtools=1.14 diff --git a/workflow/rules/align.smk b/workflow/rules/align.smk index 30d1dc9..bf29421 100644 --- a/workflow/rules/align.smk +++ b/workflow/rules/align.smk @@ -201,7 +201,7 @@ rule realign: output: bam="results/{sample}/{seqtype}/align/{group}_final_BWA.bam", conda: - "../envs/basic.yml" + "../envs/realign.yml" log: "logs/{sample}/realign/{seqtype}_{group}.log" threads: config['threads'] @@ -225,7 +225,7 @@ if config['data']['dnaseq_filetype'] in ['.fq','.fastq']: log: "logs/{sample}/bwa_align/dnaseq_{group}.log" conda: - "../envs/basic.yml" + "../envs/realign.yml" params: extra="" threads: config['threads'] diff --git a/workflow/rules/all.smk b/workflow/rules/all.smk deleted file mode 100644 index e69de29..0000000 diff --git a/workflow/rules/altsplicing.smk b/workflow/rules/altsplicing.smk index 6761fe3..70e84f1 100644 --- a/workflow/rules/altsplicing.smk +++ b/workflow/rules/altsplicing.smk @@ -54,7 +54,7 @@ rule sort_altsplicing: log: "logs/{sample}/spladder/{group}_sort.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools sort {input} -o - | bcftools view -O z -o {output} > {log} 2>&1 @@ -70,7 +70,7 @@ rule combine_altsplicing: log: "logs/{sample}/exitrons/combine_exitrons.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools concat --naive -O z {input} -o - | bcftools sort -O z -o {output} > {log} 2>&1 diff --git a/workflow/rules/custom.smk b/workflow/rules/custom.smk index 8c4a245..2253e2d 100644 --- a/workflow/rules/custom.smk +++ b/workflow/rules/custom.smk @@ -28,7 +28,7 @@ rule sort_custom_variants: log: "logs/{sample}/custom/sort_custom_variants.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools sort {input} -o - | bcftools view -O z -o {output} > {log} 2>&1 diff --git a/workflow/rules/exitron.smk b/workflow/rules/exitron.smk index 511d7b1..ed6a049 100644 --- a/workflow/rules/exitron.smk +++ b/workflow/rules/exitron.smk @@ -116,7 +116,7 @@ rule sort_exitron: log: "logs/exitron_sort_{sample}_{group}.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools sort {input} -o - | bcftools view -O z -o {output} > {log} 2>&1 @@ -132,7 +132,7 @@ rule combine_exitrons: log: "logs/{sample}/exitrons/combine_exitrons.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools concat --naive -O z {input} -o - | bcftools sort -O z -o {output} > {log} 2>&1 diff --git a/workflow/rules/germline.smk b/workflow/rules/germline.smk index 0cd912f..6759f44 100644 --- a/workflow/rules/germline.smk +++ b/workflow/rules/germline.smk @@ -97,7 +97,7 @@ rule sort_variants_htc_first_round: log: "logs/{sample}/gatk/haplotypecaller/{seqtype}_{group}_1rd_{chr}_sort.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools sort {input} -o - | bcftools view -O z -o {output} > {log} 2>&1 @@ -113,7 +113,7 @@ rule index_variants_htc_first_round: log: "logs/{sample}/gatk/haplotypecaller/{seqtype}_{group}_1rd_{chr}_index.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools index -t {input} > {log} 2>&1 @@ -130,7 +130,7 @@ rule merge_variants_htc_first_round: log: "logs/{sample}/gatk/haplotypecaller/{seqtype}_{group}_1rd_merge.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools concat -O z -a {input.vcf} -o {output} > {log} 2>&1 @@ -146,7 +146,7 @@ rule index_merged_variants_htc_first_round: log: "logs/{sample}/gatk/haplotypecaller/{seqtype}_{group}_1rd_index.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools index -t {input} > {log} 2>&1 @@ -346,7 +346,7 @@ rule sort_variants_htc_final_round: log: "logs/{sample}/indel/gatk/haplotypecaller/sort_final_{seqtype}_{group}_{chr}.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools sort {input} -o - | bcftools view -O z -o {output} > {log} 2>&1 @@ -362,7 +362,7 @@ rule index_variants_htc_final_round: log: "logs/{sample}/indel/gatk/haplotypecaller/index_final_{seqtype}_{group}_{chr}.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools index -t {input} > {log} 2>&1 @@ -379,7 +379,7 @@ rule merge_variants_htc_final_round: log: "logs/{sample}/indel/gatk/haplotypecaller/merge_final_{seqtype}_{group}.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools concat -O z -a {input.vcf} -o {output} > {log} 2>&1 @@ -395,7 +395,7 @@ rule index_merged_variants_htc_final_round: log: "logs/{sample}/indel/gatk/haplotypecaller/index_final_{seqtype}_{group}.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools index -t {input} > {log} 2>&1 diff --git a/workflow/rules/indel.smk b/workflow/rules/indel.smk index 7f20fb4..c857070 100644 --- a/workflow/rules/indel.smk +++ b/workflow/rules/indel.smk @@ -110,7 +110,7 @@ rule longindel_sort_and_compress: log: "logs/{sample}/transindel/{seqtype}_{group}_longindel_sort.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools sort {input} -o - | bcftools view -O z -o {output} > {log} 2>&1 @@ -126,7 +126,7 @@ rule combine_longindels: log: "logs/{sample}/transindel/combine_longindels.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools concat --naive -O z {input} -o - | bcftools sort -O z -o {output} > {log} 2>&1 @@ -222,7 +222,7 @@ rule sort_short_indels_m2: log: "logs/{sample}/gatk/mutect2/sort_{seqtype}_{group}_{chr}.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools sort {input} -o - | bcftools view -O z -o {output} > {log} 2>&1 @@ -238,7 +238,7 @@ rule index_short_indels_m2: log: "logs/{sample}/gatk/mutect2/index_{seqtype}_{group}_{chr}.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools index -t {input} > {log} 2>&1 @@ -255,7 +255,7 @@ rule merge_short_indels_m2: log: "logs/{sample}/gatk/mutect2/merge_{seqtype}_{group}.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools concat -O z -a {input.vcf} -o {output} > {log} 2>&1 @@ -273,7 +273,7 @@ rule index_merged_short_indels_m2: log: "logs/{sample}/gatk/mutect2/index_merged_{seqtype}_{group}.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools index -t {input} > {log} 2>&1 @@ -328,7 +328,7 @@ rule sort_aug_short_indels_m2: log: "logs/{sample}/transindel/{seqtype}_{group}_shortindel_sort.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools sort {input} -o - | bcftools view -O z -o {output} > {log} 2>&1 @@ -344,7 +344,7 @@ rule combine_aug_short_indels_m2: log: "logs/{sample}/transindel/combine_longindels.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools concat --naive -O z {input} -o - | bcftools sort -O z -o {output} > {log} 2>&1 @@ -398,7 +398,7 @@ rule sort_somatic_SNVs_m2: log: "logs/{sample}/transindel/{seqtype}_{group}_SNVs_sort.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools sort {input} -o - | bcftools view -O z -o {output} > {log} 2>&1 @@ -414,7 +414,7 @@ rule combine_somatic_SNVs_m2: log: "logs/{sample}/transindel/combine_somatic_SNVs.log" conda: - "../envs/samtools.yml" + "../envs/bcftools.yml" shell: """ bcftools concat --naive -O z {input} -o - | bcftools sort -O z -o {output} > {log} 2>&1 diff --git a/workflow/rules/postproc.smk b/workflow/rules/postproc.smk deleted file mode 100644 index d1063be..0000000 --- a/workflow/rules/postproc.smk +++ /dev/null @@ -1,71 +0,0 @@ -rule postproc: - input: - "results/{sample}/rnaseq/align/{group}_aligned.bam" - output: - "results/{sample}/rnaseq/align/{group}_ready.bam" - conda: - "../envs/samtools.yml" - log: - "logs/{sample}/postproc/rnaseq_{group}.log" - threads: 6 - shell: - """ - samtools index {input} - samtools view -bh -F 4 --min-MQ {config[mapq]} {input} -o - \ - | samtools sort -n -@ {threads} -m1g -O bam - -o - \ - | samtools fixmate -pcmu -O bam -@ {threads} - - \ - | samtools sort -@ {threads} -m1g -O bam - -o - \ - | samtools markdup -r -@ {threads} - {output} > {log} 2>&1 - samtools index {output} - """ - -rule postproc_bam_index: - input: - "results/{sample}/rnaseq/align/{group}_ready.bam" - output: - "results/{sample}/rnaseq/align/{group}_ready.bam.bai" - conda: - "../envs/samtools.yml" - log: - "logs/{sample}/postproc/index/rnaseq_{group}.log" - shell: - """ - samtools index {input} > {log} 2>&1 - """ - - -## retrieve readgroups from bam file -rule get_readgroups: - input: - get_readgroups_input - output: - "results/{sample}/{seqtype}/reads/{group}_readgroups.txt" - conda: - "../envs/basic.yml" - log: - "logs/{sample}/get_readgroups/{seqtype}_{group}.log" - shell: - """ - python workflow/scripts/get_readgroups.py '{input}' \ - {output} > {log} 2>&1 - """ - -rule realign: - input: - bam="results/{sample}/rnaseq/align/{group}_ready.bam", - rg="results/{sample}/rnaseq/reads/{group}_readgroups.txt" - output: - "results/{sample}/rnaseq/align/{group}_realigned.bam" - conda: - "../envs/basic.yml" - log: - "logs/{sample}/realign/rnaseq_{group}.log" - threads: config['threads'] - shell: - """ - samtools collate -Oun128 {input.bam} \ - | samtools fastq -OT RG -@ {threads} - \ - | bwa mem -pt{threads} -CH <(cat {input.rg}) resources/refs/bwa/genome - \ - | samtools sort -@6 -m1g - -o {output} > {log} 2>&1 - samtools index {output} - """