From 47410dd99b03c91c8a302513a9d5075558d5dd4c Mon Sep 17 00:00:00 2001
From: riasc <richard.schaefer@zoho.com>
Date: Fri, 21 Jun 2024 00:48:12 -0500
Subject: [PATCH] added reference genome index on germline indel calling (which
 is required when only indel calling has been activated & remove -C from BWA
 mem call (on DNAseq data) which causes issues on Illumina identifier

---
 CHANGELOG.md              | 8 ++++++++
 workflow/rules/align.smk  | 7 +++----
 workflow/rules/common.smk | 1 -
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3f9196c..5ff3411 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Prioritization of neoantigens is now done separately for each variant type (speeds up the process)
 - NMD information (e.g., escape rule,...) is now also calculated for all variants
 
+## [0.2.6] - 2024-06-20
+
+### Fix 
+
+- Added routines to catch errors when rnaseq data is not provided but exitron/alternative splicing calling is activated
+- Added reference genome index as input to germline indel calling (necessary when only indel calling is activated)
+- removed -C from BWA mem call (on DNAseq data) to avoid error on Illumina identifiers
+
 ## [0.2.5] - 2024-06-19
 
 ### Fix 
diff --git a/workflow/rules/align.smk b/workflow/rules/align.smk
index 9ec2699..30d1dc9 100644
--- a/workflow/rules/align.smk
+++ b/workflow/rules/align.smk
@@ -231,10 +231,9 @@ if config['data']['dnaseq_filetype'] in ['.fq','.fastq']:
     threads: config['threads']
     shell:
       """
-        bwa mem -t{threads} -C resources/refs/bwa/genome {input.reads} \
-        | samtools addreplacerg -r ID:{wildcards.group} -r SM:{wildcards.sample} \
-        -r LB:{wildcards.sample} -r PL:ILLUMINA -r PU:{wildcards.group} - - \
-        | samtools sort -@ 6 -n -m1g - -o {output} > {log} 2>&1
+        bwa mem -t{threads} resources/refs/bwa/genome \
+            -R '@RG\\tID:{wildcards.group}\\tSM:{wildcards.sample}\\tLB:{wildcards.sample}\\tPL:ILLUMINA' \
+            {input.reads} | samtools sort -@ 6 -n -m1g - -o {output} > {log} 2>&1
       """
 
   rule dnaseq_postproc:
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index 9a2a295..b6d4402 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -124,7 +124,6 @@ def all_identical(l):
 
 # load up the config
 config['data'] = data_structure(config['data'])
-print(config)
 
 ########### PREPROCESSING ##########
 def get_raw_reads(wildcards):