From e407afbcb9a1ac81ec7a88933e57420c61682fb9 Mon Sep 17 00:00:00 2001
From: Miguel Brown <miguel.a.brown@gmail.com>
Date: Wed, 17 Jul 2024 14:55:13 -0400
Subject: [PATCH 1/2] :pencil: update snv hotspot refs

---
 README.md                                 | 2 +-
 workflows/kfdrc_mutect2_production_wf.cwl | 2 +-
 workflows/kfdrc_tumor_only_dna_wf.cwl     | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 00d95ea..1d1cee0 100644
--- a/README.md
+++ b/README.md
@@ -61,7 +61,7 @@ Most inputs have recommended values that should auto import both files and param
  - `b_allele`: dbSNP_v153_ucsc-compatible.converted.vt.decomp.norm.common_snps.vcf.gz. dbSNP v153 was obtained from [the ftp site](https://ftp.ncbi.nih.gov/snp/archive/b153/VCF/GCF_000001405.38.gz). Then, using a awk/perl/bash script of your choice, convert NCBI accession names to UCSC-style chromosome names using [this table](https://hgdownload.soe.ucsc.edu/hubs/GCF/000/001/405/GCF_000001405.39/GCF_000001405.39.chromAlias.txt). Next, run the [VCF normalization tool](https://github.com/kids-first/kf-annotation-tools/blob/master/tools/normalize_vcf.cwl), then use bcftools to extract only common snps: `bcftools view --include INFO/COMMON=1 --types snps dbSNP_v153_ucsc-compatible.converted.vt.decomp.norm.vcf.gz -O z -o dbSNP_v153_ucsc-compatible.converted.vt.decomp.norm.common_snps.vcf.gz`. Lastly, use tabix to index the resultant file.
  - `vep_cache`: homo_sapiens_merged_vep_105_indexed_GRCh38.tar.gz
  - `genomic_hotspots`: tert.bed # bed file with TERT gene promoter region
- - `protein_snv_hotspots`: protein_snv_cancer_hotspots_v2.ENS105_liftover.tsv
+ - `protein_snv_hotspots`: kfdrc_protein_snv_cancer_hotspots_20240718.txt
  - `protein_indel_hotspots`: protein_indel_cancer_hotspots_v2.ENS105_liftover.tsv
  - `echtvar_anno_zips`: gnomad.v3.1.1.custom.echtvar.zip
 ### Necessary for user to define:
diff --git a/workflows/kfdrc_mutect2_production_wf.cwl b/workflows/kfdrc_mutect2_production_wf.cwl
index fae986f..b775b60 100644
--- a/workflows/kfdrc_mutect2_production_wf.cwl
+++ b/workflows/kfdrc_mutect2_production_wf.cwl
@@ -202,7 +202,7 @@ inputs:
   protein_snv_hotspots: {type: 'File[]?', doc: "Column-name-containing, tab-delimited\
       \ file(s) containing protein names and amino acid positions corresponding to\
       \ hotspots", "sbg:suggestedValue": [{class: File, path: 645919782fe81458768c552c,
-        name: protein_snv_cancer_hotspots_v2.ENS105_liftover.tsv}]}
+        name: kfdrc_protein_snv_cancer_hotspots_20240718.txt}]}
   protein_indel_hotspots: {type: 'File[]?', doc: "Column-name-containing, tab-delimited\
       \ file(s) containing protein names and amino acid position ranges corresponding\
       \ to hotspots", "sbg:suggestedValue": [{class: File, path: 645919782fe81458768c552d,
diff --git a/workflows/kfdrc_tumor_only_dna_wf.cwl b/workflows/kfdrc_tumor_only_dna_wf.cwl
index 47ba370..45bae83 100644
--- a/workflows/kfdrc_tumor_only_dna_wf.cwl
+++ b/workflows/kfdrc_tumor_only_dna_wf.cwl
@@ -65,7 +65,7 @@ doc: |
    - `b_allele`: dbSNP_v153_ucsc-compatible.converted.vt.decomp.norm.common_snps.vcf.gz. dbSNP v153 was obtained from [the ftp site](https://ftp.ncbi.nih.gov/snp/archive/b153/VCF/GCF_000001405.38.gz). Then, using a awk/perl/bash script of your choice, convert NCBI accession names to UCSC-style chromosome names using [this table](https://hgdownload.soe.ucsc.edu/hubs/GCF/000/001/405/GCF_000001405.39/GCF_000001405.39.chromAlias.txt). Next, run the [VCF normalization tool](https://github.com/kids-first/kf-annotation-tools/blob/master/tools/normalize_vcf.cwl), then use bcftools to extract only common snps: `bcftools view --include INFO/COMMON=1 --types snps dbSNP_v153_ucsc-compatible.converted.vt.decomp.norm.vcf.gz -O z -o dbSNP_v153_ucsc-compatible.converted.vt.decomp.norm.common_snps.vcf.gz`. Lastly, use tabix to index the resultant file.
    - `vep_cache`: homo_sapiens_merged_vep_105_indexed_GRCh38.tar.gz
    - `genomic_hotspots`: tert.bed # bed file with TERT gene promoter region
-   - `protein_snv_hotspots`: protein_snv_cancer_hotspots_v2.ENS105_liftover.tsv
+   - `protein_snv_hotspots`: kfdrc_protein_snv_cancer_hotspots_20240718.txt
    - `protein_indel_hotspots`: protein_indel_cancer_hotspots_v2.ENS105_liftover.tsv
    - `echtvar_anno_zips`: gnomad.v3.1.1.custom.echtvar.zip
   ### Necessary for user to define:
@@ -204,7 +204,7 @@ inputs:
   genomic_hotspots: {type: 'File[]?', doc: "Tab-delimited BED formatted file(s) containing hg38 genomic positions corresponding to
       hotspots", "sbg:suggestedValue": [{class: File, path: 607713829360f10e3982a423, name: tert.bed}]}
   protein_snv_hotspots: {type: 'File[]?', doc: "Column-name-containing, tab-delimited file(s) containing protein names and amino acid
-      positions corresponding to hotspots", "sbg:suggestedValue": [{class: File, path: 663d2bcc27374715fccd8c6a, name: protein_snv_cancer_hotspots_v2.ENS105_liftover.tsv}]}
+      positions corresponding to hotspots", "sbg:suggestedValue": [{class: File, path: 66980e845a58091951d53984, name: kfdrc_protein_snv_cancer_hotspots_20240718.txt}]}
   protein_indel_hotspots: {type: 'File[]?', doc: "Column-name-containing, tab-delimited file(s) containing protein names and amino
       acid position ranges corresponding to hotspots", "sbg:suggestedValue": [{class: File, path: 663d2bcc27374715fccd8c6f, name: protein_indel_cancer_hotspots_v2.ENS105_liftover.tsv}]}
   mutect2_retain_info: {type: 'string?', doc: "csv string with INFO fields that you want to keep", default: "gnomad_3_1_1_AC,gnomad_3_1_1_AN,gnomad_3_1_1_AF,gnomad_3_1_1_nhomalt,gnomad_3_1_1_AC_popmax,gnomad_3_1_1_AN_popmax,gnomad_3_1_1_AF_popmax,gnomad_3_1_1_nhomalt_popmax,gnomad_3_1_1_AC_controls_and_biobanks,gnomad_3_1_1_AN_controls_and_biobanks,gnomad_3_1_1_AF_controls_and_biobanks,gnomad_3_1_1_AF_non_cancer,gnomad_3_1_1_primate_ai_score,gnomad_3_1_1_splice_ai_consequence,gnomad_3_1_1_AF_non_cancer_afr,gnomad_3_1_1_AF_non_cancer_ami,gnomad_3_1_1_AF_non_cancer_asj,gnomad_3_1_1_AF_non_cancer_eas,gnomad_3_1_1_AF_non_cancer_fin,gnomad_3_1_1_AF_non_cancer_mid,gnomad_3_1_1_AF_non_cancer_nfe,gnomad_3_1_1_AF_non_cancer_oth,gnomad_3_1_1_AF_non_cancer_raw,gnomad_3_1_1_AF_non_cancer_sas,gnomad_3_1_1_AF_non_cancer_amr,gnomad_3_1_1_AF_non_cancer_popmax,gnomad_3_1_1_AF_non_cancer_all_popmax,gnomad_3_1_1_FILTER,MQ,MQ0,QSI,HotSpotAllele"}

From 1020a39587f51ef0fde3eda9387ec328c32ef9a3 Mon Sep 17 00:00:00 2001
From: migbro <migbro@users.noreply.github.com>
Date: Thu, 18 Jul 2024 14:10:11 +0000
Subject: [PATCH 2/2] :robot: update annotation submodule

---
 kf-annotation-tools | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kf-annotation-tools b/kf-annotation-tools
index 5828407..934ea4c 160000
--- a/kf-annotation-tools
+++ b/kf-annotation-tools
@@ -1 +1 @@
-Subproject commit 5828407b2e9c1f6a4a4490a1b5897da4a2669788
+Subproject commit 934ea4c2903906a904fd9602356f1dd16543cb42