Skip to content

Commit

Permalink
Snakefmt annotations.snakefile
Browse files Browse the repository at this point in the history
  • Loading branch information
endast committed Oct 19, 2023
1 parent c2b0226 commit 51c7950
Showing 1 changed file with 80 additions and 60 deletions.
140 changes: 80 additions & 60 deletions pipelines/annotations.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ from pathlib import Path

configfile: "config/deeprvat_annotation_config.yaml"


# init general

species = config.get("species") or "homo_sapiens"
Expand All @@ -26,8 +27,12 @@ saved_deepripe_models_path = (
merge_nthreads = int(config.get("merge_nthreads") or 64)

# If modules are used we load them here
load_bfc = " ".join([config["bcftools_load_cmd"], "&&" if config["bcftools_load_cmd"] else ""])
load_hts = " ".join([config["htslib_load_cmd"], "&&" if config["htslib_load_cmd"] else ""])
load_bfc = " ".join(
[config["bcftools_load_cmd"], "&&" if config["bcftools_load_cmd"] else ""]
)
load_hts = " ".join(
[config["htslib_load_cmd"], "&&" if config["htslib_load_cmd"] else ""]
)
load_perl = " ".join([config["perl_load_cmd"], "&&" if config["perl_load_cmd"] else ""])
load_vep = " ".join([config["vep_load_cmd"], "&&" if config["vep_load_cmd"] else ""])

Expand All @@ -53,7 +58,7 @@ vep_cache_dir = Path(config.get("vep_cache_dir")) or vep_source_dir / "cache"
vep_plugin_dir = Path(config.get("vep_plugin_dir")) or vep_source_dir / "Plugin"
vep_input_format = config.get("vep_input_format") or "vcf"
vep_nfork = int(config.get("vep_nfork") or 5)
af_mode= config.get("af_mode") or "af"
af_mode = config.get("af_mode") or "af"

# init plugIns
spliceAI_snv_file = config["spliceAI_snv_file"]
Expand All @@ -72,11 +77,11 @@ pvcf_blocks_df = pd.read_csv(
absplice_repo_dir = Path(config["absplice_repo_dir"])
n_cores_absplice = int(config.get("n_cores_absplice") or 4)
ncores_merge_absplice = int(config.get("n_cores_merge_absplice") or 64)
#init deepripe
# init deepripe
n_jobs_deepripe = int(config.get("n_jobs_deepripe") or 8)
# init kipoi-veff2
kipoi_repo_dir = Path(config["kipoiveff_repo_dir"])
ncores_addis = int(config.get("n_jobs_addids") or 32)
ncores_addis = int(config.get("n_jobs_addids") or 32)
# Filter out which chromosomes to work with
pvcf_blocks_df = pvcf_blocks_df[
pvcf_blocks_df["Chromosome"].isin([str(c) for c in included_chromosomes])
Expand All @@ -103,16 +108,19 @@ rule all:
rule aggregate_and_merge_absplice:
input:
abscore_files=expand(
[anno_tmp_dir / "absplice" / (source_variant_file_pattern + "_AbSplice_DNA.csv")],
[
anno_tmp_dir
/ "absplice"
/ (source_variant_file_pattern + "_AbSplice_DNA.csv")
],
zip,
chr=chromosomes,
block=block,
),
current_annotation_file=anno_dir / "vep_deepripe_deepsea.parquet"
current_annotation_file=anno_dir / "vep_deepripe_deepsea.parquet",
output:
annotations=anno_dir / "current_annotations_absplice.parquet",
scores=anno_tmp_dir / "abSplice_score_file.parquet"

scores=anno_tmp_dir / "abSplice_score_file.parquet",
shell:
" ".join(
[
Expand All @@ -123,18 +131,17 @@ rule aggregate_and_merge_absplice:
str(anno_tmp_dir / "absplice"),
"{output.annotations}",
"{output.scores}",
f"{ncores_merge_absplice}"
])


f"{ncores_merge_absplice}",
]
)


rule merge_deepsea_pcas:
input:
annotations=anno_dir / "vep_deepripe.parquet",
deepsea_pcas=anno_dir / "deepSea_pca" / "deepsea_pca.parquet",
output:
anno_dir / "vep_deepripe_deepsea.parquet"
anno_dir / "vep_deepripe_deepsea.parquet",
shell:
" ".join(
[
Expand All @@ -147,40 +154,55 @@ rule merge_deepsea_pcas:
]
)


rule concat_annotations:
input:
pvcf = metadata_dir / config['pvcf_blocks_file'],
anno_dir = anno_dir,
vcf_files=
expand([anno_dir / f"{source_variant_file_pattern}_merged.parquet"],
pvcf=metadata_dir / config["pvcf_blocks_file"],
anno_dir=anno_dir,
vcf_files=expand(
[anno_dir / f"{source_variant_file_pattern}_merged.parquet"],
zip,
chr=chromosomes,
block=block)
output: anno_dir / "vep_deepripe.parquet"
block=block,
),
output:
anno_dir / "vep_deepripe.parquet",
shell:
" ".join([
"python",
str(annotation_python_file),
"concat-annotations",
"{input.pvcf}",
"{input.anno_dir}",
f"{str(source_variant_file_pattern + '_merged.parquet').format(chr='{{chr}}', block='{{block}}')}",
"{output}",
f" --included-chromosomes {','.join(included_chromosomes)}"
])
" ".join(
[
"python",
str(annotation_python_file),
"concat-annotations",
"{input.pvcf}",
"{input.anno_dir}",
f"{str(source_variant_file_pattern + '_merged.parquet').format(chr='{{chr}}', block='{{block}}')}",
"{output}",
f" --included-chromosomes {','.join(included_chromosomes)}",
]
)


rule merge_annotations:
input:
vep = anno_dir / (source_variant_file_pattern + "_vep_anno.tsv"),
deepripe_parclip = anno_dir / (source_variant_file_pattern + "_variants.parclip_deepripe.csv.gz"),
deepripe_k5 = anno_dir / (source_variant_file_pattern + "_variants.eclip_k5_deepripe.csv.gz"),
deepripe_hg2 = anno_dir / (source_variant_file_pattern + "_variants.eclip_hg2_deepripe.csv.gz"),
variant_file = variant_file


vep=anno_dir / (source_variant_file_pattern + "_vep_anno.tsv"),
deepripe_parclip=anno_dir
/ (source_variant_file_pattern + "_variants.parclip_deepripe.csv.gz"),
deepripe_k5=anno_dir
/ (source_variant_file_pattern + "_variants.eclip_k5_deepripe.csv.gz"),
deepripe_hg2=anno_dir
/ (source_variant_file_pattern + "_variants.eclip_hg2_deepripe.csv.gz"),
variant_file=variant_file,
output:
anno_dir / f"{source_variant_file_pattern}_merged.parquet",
shell: "HEADER=$(grep -n '#Uploaded_variation' "+"{input.vep}" +"| head | cut -f 1 -d ':') && python "+f"{annotation_python_file} "+"merge-annotations $(($HEADER-1)) {input.vep} {input.deepripe_parclip} {input.deepripe_hg2} {input.deepripe_k5} {input.variant_file} {output}"
shell:
(
"HEADER=$(grep -n '#Uploaded_variation' "
+ "{input.vep}"
+ "| head | cut -f 1 -d ':') && python "
+ f"{annotation_python_file} "
+ "merge-annotations $(($HEADER-1)) {input.vep} {input.deepripe_parclip} {input.deepripe_hg2} {input.deepripe_k5} {input.variant_file} {output}"
)


rule mv_absplice_files:
input:
Expand Down Expand Up @@ -223,7 +245,6 @@ rule absplice:
block=block,
),
config=absplice_repo_dir / "example" / "workflow" / "mv_config.done",

output:
expand(
[
Expand All @@ -242,7 +263,10 @@ rule absplice:
),
threads: n_cores_absplice
shell:
f"""python -m snakemake -s {str(absplice_repo_dir/"example"/"workflow"/"Snakefile")} -j 1 --use-conda --rerun-incomplete --directory {str(absplice_repo_dir /"example"/"workflow")} -c"""+"{threads}"
(
f"""python -m snakemake -s {str(absplice_repo_dir/"example"/"workflow"/"Snakefile")} -j 1 --use-conda --rerun-incomplete --directory {str(absplice_repo_dir /"example"/"workflow")} -c"""
+ "{threads}"
)


rule mod_config_absplice:
Expand All @@ -251,6 +275,7 @@ rule mod_config_absplice:
shell:
f""" rm {absplice_repo_dir}/example/workflow/config.yaml && cp {deeprvat_parent_path}/pipelines/resources/absplice_config.yaml {absplice_repo_dir}/example/workflow/config.yaml && touch {absplice_repo_dir}/example/workflow/mv_config.done"""


rule link_files_absplice:
input:
anno_tmp_dir / (source_variant_file_pattern + "_variants_header.vcf.gz"),
Expand All @@ -262,13 +287,11 @@ rule link_files_absplice:
f"mkdir -p {absplice_repo_dir/'example/data/resources/analysis_files/input_files'} && ln -s -r {{input}} {{output}}"



rule deepSea_PCA:
input:
anno_dir / "all_variants.wID.deepSea.csv",
output:
anno_dir / "deepSea_pca" / "deepsea_pca.parquet",

shell:
" ".join(
[
Expand All @@ -280,7 +303,7 @@ rule deepSea_PCA:
"deepsea-pca",
f"--n-components {config['deepsea_pca_n_components']}",
"{input}",
str(anno_dir / "deepSea_pca"/ "pca_matrix"),
str(anno_dir / "deepSea_pca" / "pca_matrix"),
str(anno_dir / "deepSea_pca"),
]
)
Expand All @@ -291,9 +314,8 @@ rule add_ids_deepSea:
variant_file=variant_file,
annotation_file=anno_dir / "all_variants.deepSea.csv",
output:
anno_dir / "all_variants.wID.deepSea.csv"
anno_dir / "all_variants.wID.deepSea.csv",
threads: ncores_addis

shell:
" ".join(
[
Expand All @@ -312,15 +334,15 @@ rule concat_deepSea:
input:
expand(
[
anno_dir / (source_variant_file_pattern + ".CLI.deepseapredict.diff.tsv"),
anno_dir
/ (source_variant_file_pattern + ".CLI.deepseapredict.diff.tsv"),
],
zip,
chr=chromosomes,
block=block,
),
output:
anno_dir / "all_variants.deepSea.csv",

shell:
" ".join(
[
Expand All @@ -331,9 +353,9 @@ rule concat_deepSea:
",".join(included_chromosomes),
"--sep '\t'",
f"{anno_dir}",
str(source_variant_file_pattern + ".CLI.deepseapredict.diff.tsv").format(
chr="{{chr}}", block="{{block}}"
),
str(
source_variant_file_pattern + ".CLI.deepseapredict.diff.tsv"
).format(chr="{{chr}}", block="{{block}}"),
str(metadata_dir / config["pvcf_blocks_file"]),
str(
anno_dir / "all_variants.deepSea.csv",
Expand All @@ -344,7 +366,8 @@ rule concat_deepSea:

rule deepSea:
input:
variants=anno_tmp_dir / (source_variant_file_pattern + "_variants_header.vcf.gz"),
variants=anno_tmp_dir
/ (source_variant_file_pattern + "_variants_header.vcf.gz"),
fasta=fasta_dir / fasta_file_name,
output:
anno_dir / (source_variant_file_pattern + ".CLI.deepseapredict.diff.tsv"),
Expand All @@ -354,15 +377,12 @@ rule deepSea:
"kipoi_veff2_predict {input.variants} {input.fasta} {output} -l 1000 -m 'DeepSEA/predict' -s 'diff'"




rule deepRiPe_parclip:
input:
variants=anno_tmp_dir / (source_variant_file_pattern + "_variants.vcf"),
fasta=fasta_dir / fasta_file_name,
output:
anno_dir / (source_variant_file_pattern + "_variants.parclip_deepripe.csv.gz"),

shell:
f"mkdir -p {pybedtools_tmp_path/'parclip'} && python {annotation_python_file} scorevariants-deepripe {{input.variants}} {anno_dir} {{input.fasta}} {pybedtools_tmp_path/'parclip'} {saved_deepripe_models_path} {{threads}} 'parclip'"

Expand All @@ -384,21 +404,18 @@ rule deepRiPe_eclip_k5:
fasta=fasta_dir / fasta_file_name,
output:
anno_dir / (source_variant_file_pattern + "_variants.eclip_k5_deepripe.csv.gz"),

threads: lambda wildcards, attempt: n_jobs_deepripe * attempt
shell:
f"mkdir -p {pybedtools_tmp_path/'k5'} && python {annotation_python_file} scorevariants-deepripe {{input.variants}} {anno_dir} {{input.fasta}} {pybedtools_tmp_path/'k5'} {saved_deepripe_models_path} {{threads}} 'eclip_k5'"



rule vep:
input:
vcf=anno_tmp_dir / (source_variant_file_pattern + "_stripped.vcf.gz"),
fasta=fasta_dir / fasta_file_name,
output:
anno_dir / (source_variant_file_pattern + "_vep_anno.tsv"),
threads: vep_nfork

shell:
" ".join(
[
Expand Down Expand Up @@ -442,13 +459,15 @@ rule vep:
f"--plugin CADD,{cadd_snv_file},{cadd_indel_file}",
f"--plugin SpliceAI,snv={spliceAI_snv_file},indel={spliceAI_indel_file}",
f"--plugin PrimateAI,{primateAIfile}",
f"--plugin Condel,{condel_config_path},s,2"
f"--plugin Condel,{condel_config_path},s,2",
]
)


rule extract_with_header:
input:
source_variant_dir / (source_variant_file_pattern + f".{config['source_variant_file_type']}"),
source_variant_dir
/ (source_variant_file_pattern + f".{config['source_variant_file_type']}"),
output:
anno_tmp_dir / (source_variant_file_pattern + "_variants_header.vcf.gz"),
shell:
Expand All @@ -473,7 +492,8 @@ rule strip_chr_name:

rule extract_variants:
input:
source_variant_dir / (source_variant_file_pattern + f".{config['source_variant_file_type']}"),
source_variant_dir
/ (source_variant_file_pattern + f".{config['source_variant_file_type']}"),
output:
anno_tmp_dir / (source_variant_file_pattern + "_variants.vcf"),
shell:
Expand Down

0 comments on commit 51c7950

Please sign in to comment.