From 47410dd99b03c91c8a302513a9d5075558d5dd4c Mon Sep 17 00:00:00 2001 From: riasc Date: Fri, 21 Jun 2024 00:48:12 -0500 Subject: [PATCH] added reference genome index on germline indel calling (which is required when only indel calling has been activated & remove -C from BWA mem call (on DNAseq data) which causes issues on Illumina identifier --- CHANGELOG.md | 8 ++++++++ workflow/rules/align.smk | 7 +++---- workflow/rules/common.smk | 1 - 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f9196c..5ff3411 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Prioritization of neoantigens is now done separately for each variant type (speeds up the process) - NMD information (e.g., escape rule,...) is now also calculated for all variants +## [0.2.6] - 2024-06-20 + +### Fix + +- Added routines to catch errors when rnaseq data is not provided but exitron/alternative splicing calling is activated +- Added reference genome index as input to germline indel calling (necessary when only indel calling is activated) +- removed -C from BWA mem call (on DNAseq data) to avoid error on Illumina identifiers + ## [0.2.5] - 2024-06-19 ### Fix diff --git a/workflow/rules/align.smk b/workflow/rules/align.smk index 9ec2699..30d1dc9 100644 --- a/workflow/rules/align.smk +++ b/workflow/rules/align.smk @@ -231,10 +231,9 @@ if config['data']['dnaseq_filetype'] in ['.fq','.fastq']: threads: config['threads'] shell: """ - bwa mem -t{threads} -C resources/refs/bwa/genome {input.reads} \ - | samtools addreplacerg -r ID:{wildcards.group} -r SM:{wildcards.sample} \ - -r LB:{wildcards.sample} -r PL:ILLUMINA -r PU:{wildcards.group} - - \ - | samtools sort -@ 6 -n -m1g - -o {output} > {log} 2>&1 + bwa mem -t{threads} resources/refs/bwa/genome \ + -R '@RG\\tID:{wildcards.group}\\tSM:{wildcards.sample}\\tLB:{wildcards.sample}\\tPL:ILLUMINA' \ + {input.reads} | samtools sort -@ 6 -n -m1g - -o {output} > {log} 2>&1 """ rule dnaseq_postproc: diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 9a2a295..b6d4402 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -124,7 +124,6 @@ def all_identical(l): # load up the config config['data'] = data_structure(config['data']) -print(config) ########### PREPROCESSING ########## def get_raw_reads(wildcards):