Skip to content

Commit

Permalink
added multithreading for deepripe scores to snakemake file
Browse files Browse the repository at this point in the history
  • Loading branch information
“Marcel-Mueck” committed Oct 6, 2023
1 parent 87a33ba commit e91ca3d
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 16 deletions.
1 change: 1 addition & 0 deletions deeprvat_annotations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- numpy=1.21.2
- tensorflow=2.11.0
- pyarrow=11.0.0
- fastparquet=2023.4.0
#comment out lines below if you want to use preinstalled bcftools or samtools
- bcftools=1.17
- samtools=1.17
28 changes: 18 additions & 10 deletions pipelines/annotations.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ load_perl = " ".join([config["perl_load_cmd"], "&&"])
load_vep = " ".join([config["vep_load_cmd"], "&&"])

# init data path
vcf_pattern = config["vcf_file_pattern"]
vcf_pattern = config["bcf_file_pattern"]
bcf_dir = Path(config["bcf_dir"])
anno_tmp_dir = Path(config["anno_tmp_dir"])
anno_dir = Path(config["anno_dir"])
Expand Down Expand Up @@ -65,7 +65,14 @@ pvcf_blocks_df = pd.read_csv(
dtype={"Chromosome": str},
).set_index("Index")
# init absplice
absplice_repo_dir = Path(config["absplice_repo_dir"])
n_cores_absplice = int(config.get("n_cores_absplice") or 4)
ncores_merge_absplice = int(config.get("n_cores_merge_absplice") or 64)
#init deepripe
n_jobs_deepripe = int(config.get("n_jobs_deepripe") or 8)
# init kipoi-veff2
kipoi_repo_dir = Path(config["kipoiveff_repo_dir"])
ncores_addis = int(config.get("n_jobs_deepripe") or 32)
# Filter out which chromosomes to work with
pvcf_blocks_df = pvcf_blocks_df[
pvcf_blocks_df["Chromosome"].isin([str(c) for c in included_chromosomes])
Expand Down Expand Up @@ -575,10 +582,11 @@ rule deepRiPe_parclip:
setup=repo_dir / "annotation-workflow-setup.done",
output:
anno_dir / (vcf_pattern + "_variants.parclip_deepripe.csv"),
resources:
mem_mb=100000

threads: lambda wildcards, attempt: n_jobs_deepripe * attempt

shell:
f"mkdir -p {pybedtools_tmp_path/'parclip'} && python {annotation_python_file} scorevariants-deepripe {{input.variants}} {anno_dir} {{input.fasta}} {pybedtools_tmp_path/'parclip'} {saved_deepripe_models_path} 'parclip'"
f"mkdir -p {pybedtools_tmp_path/'parclip'} && python {annotation_python_file} scorevariants-deepripe {{input.variants}} {anno_dir} {{input.fasta}} {pybedtools_tmp_path/'parclip'} {saved_deepripe_models_path} {{threads}} 'parclip'"



Expand All @@ -589,10 +597,10 @@ rule deepRiPe_eclip_hg2:
setup=repo_dir / "annotation-workflow-setup.done",
output:
anno_dir / (vcf_pattern + "_variants.eclip_hg2_deepripe.csv"),
resources:
mem_mb=100000
threads: lambda wildcards, attempt: n_jobs_deepripe * attempt

shell:
f"mkdir -p {pybedtools_tmp_path/'hg2'} && python {annotation_python_file} scorevariants-deepripe {{input.variants}} {anno_dir} {{input.fasta}} {pybedtools_tmp_path/'hg2'} {saved_deepripe_models_path} 'eclip_hg2'"
f"mkdir -p {pybedtools_tmp_path/'hg2'} && python {annotation_python_file} scorevariants-deepripe {{input.variants}} {anno_dir} {{input.fasta}} {pybedtools_tmp_path/'hg2'} {saved_deepripe_models_path} {{threads}} 'eclip_hg2'"


rule deepRiPe_eclip_k5:
Expand All @@ -602,10 +610,10 @@ rule deepRiPe_eclip_k5:
setup=repo_dir / "annotation-workflow-setup.done",
output:
anno_dir / (vcf_pattern + "_variants.eclip_k5_deepripe.csv"),
resources:
mem_mb=100000
threads: lambda wildcards, attempt: n_jobs_deepripe * attempt

shell:
f"mkdir -p {pybedtools_tmp_path/'k5'} && python {annotation_python_file} scorevariants-deepripe {{input.variants}} {anno_dir} {{input.fasta}} {pybedtools_tmp_path/'k5'} {saved_deepripe_models_path} 'eclip_k5'"
f"mkdir -p {pybedtools_tmp_path/'k5'} && python {annotation_python_file} scorevariants-deepripe {{input.variants}} {anno_dir} {{input.fasta}} {pybedtools_tmp_path/'k5'} {saved_deepripe_models_path} {{threads}} 'eclip_k5'"


rule all_vep:
Expand Down
13 changes: 7 additions & 6 deletions pipelines/config/deeprvat_annotation_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ htslib_load_cmd : module load htslib/1.9
perl_load_cmd : module load perl/5.20.2
vep_load_cmd : module load vep/108.1

vcf_file_pattern : ukb23156_c{chr}_b{block}_v1
bcf_file_pattern : ukb23156_c{chr}_b{block}_v1
included_chromosomes : ['21','22']

metadata_dir : input_dir/vcf/metadata
pvcf_blocks_file : pvcf_blocks.txt
vcf_dir : input_dir/vcf
bcf_dir : input_dir/bcf
anno_tmp_dir : output_dir/annotations/tmp
anno_dir : output_dir/annotations

Expand All @@ -23,10 +23,11 @@ spliceAI_indel_file : annotation_data/spliceAI/spliceai_scores.raw.indel.hg38.vc
primateAI_file : annotation_data/primateAI/PrimateAI_scores_v0.2_GRCh38_sorted.tsv.bgz
cadd_snv_file : annotation_data/cadd/whole_genome_SNVs.tsv.gz
cadd_indel_file : annotation_data/cadd/gnomad.genomes.r3.0.indel.tsv.gz

absplice_repo_dir : repo_dir/absplice
deeprvat_repo_dir : deeprvat_repo_dir
kipoi_repo_dir : repo_dir/kipoi-veff2
variant_file_path : preprocessing_workdir/norm/variants/variants.tsv.gz
pybedtools_tmp_path : output_dir/annotations/tmp/pybedtools



n_jobs_deepripe : 32
n_cores_merge_absplice : 32
n_cores_absplice : 32

0 comments on commit e91ca3d

Please sign in to comment.