diff --git a/deeprvat_annotations.yml b/deeprvat_annotations.yml index e1834e9b..c7b5e1bc 100644 --- a/deeprvat_annotations.yml +++ b/deeprvat_annotations.yml @@ -15,6 +15,7 @@ dependencies: - numpy=1.21.2 - tensorflow=2.11.0 - pyarrow=11.0.0 + - fastparquet=2023.4.0 #comment out lines below if you want to use preinstalled bcftools or samtools - bcftools=1.17 - samtools=1.17 \ No newline at end of file diff --git a/pipelines/annotations.snakefile b/pipelines/annotations.snakefile index 2f14204a..b8f7b345 100644 --- a/pipelines/annotations.snakefile +++ b/pipelines/annotations.snakefile @@ -32,7 +32,7 @@ load_perl = " ".join([config["perl_load_cmd"], "&&"]) load_vep = " ".join([config["vep_load_cmd"], "&&"]) # init data path -vcf_pattern = config["vcf_file_pattern"] +vcf_pattern = config["bcf_file_pattern"] bcf_dir = Path(config["bcf_dir"]) anno_tmp_dir = Path(config["anno_tmp_dir"]) anno_dir = Path(config["anno_dir"]) @@ -65,7 +65,14 @@ pvcf_blocks_df = pd.read_csv( dtype={"Chromosome": str}, ).set_index("Index") # init absplice +absplice_repo_dir = Path(config["absplice_repo_dir"]) n_cores_absplice = int(config.get("n_cores_absplice") or 4) +ncores_merge_absplice = int(config.get("n_cores_merge_absplice") or 64) +#init deepripe +n_jobs_deepripe = int(config.get("n_jobs_deepripe") or 8) +# init kipoi-veff2 +kipoi_repo_dir = Path(config["kipoiveff_repo_dir"]) +ncores_addis = int(config.get("n_jobs_deepripe") or 32) # Filter out which chromosomes to work with pvcf_blocks_df = pvcf_blocks_df[ pvcf_blocks_df["Chromosome"].isin([str(c) for c in included_chromosomes]) @@ -575,10 +582,11 @@ rule deepRiPe_parclip: setup=repo_dir / "annotation-workflow-setup.done", output: anno_dir / (vcf_pattern + "_variants.parclip_deepripe.csv"), - resources: - mem_mb=100000 + + threads: lambda wildcards, attempt: n_jobs_deepripe * attempt + shell: - f"mkdir -p {pybedtools_tmp_path/'parclip'} && python {annotation_python_file} scorevariants-deepripe {{input.variants}} {anno_dir} {{input.fasta}} {pybedtools_tmp_path/'parclip'} {saved_deepripe_models_path} 'parclip'" + f"mkdir -p {pybedtools_tmp_path/'parclip'} && python {annotation_python_file} scorevariants-deepripe {{input.variants}} {anno_dir} {{input.fasta}} {pybedtools_tmp_path/'parclip'} {saved_deepripe_models_path} {{threads}} 'parclip'" @@ -589,10 +597,10 @@ rule deepRiPe_eclip_hg2: setup=repo_dir / "annotation-workflow-setup.done", output: anno_dir / (vcf_pattern + "_variants.eclip_hg2_deepripe.csv"), - resources: - mem_mb=100000 + threads: lambda wildcards, attempt: n_jobs_deepripe * attempt + shell: - f"mkdir -p {pybedtools_tmp_path/'hg2'} && python {annotation_python_file} scorevariants-deepripe {{input.variants}} {anno_dir} {{input.fasta}} {pybedtools_tmp_path/'hg2'} {saved_deepripe_models_path} 'eclip_hg2'" + f"mkdir -p {pybedtools_tmp_path/'hg2'} && python {annotation_python_file} scorevariants-deepripe {{input.variants}} {anno_dir} {{input.fasta}} {pybedtools_tmp_path/'hg2'} {saved_deepripe_models_path} {{threads}} 'eclip_hg2'" rule deepRiPe_eclip_k5: @@ -602,10 +610,10 @@ rule deepRiPe_eclip_k5: setup=repo_dir / "annotation-workflow-setup.done", output: anno_dir / (vcf_pattern + "_variants.eclip_k5_deepripe.csv"), - resources: - mem_mb=100000 + threads: lambda wildcards, attempt: n_jobs_deepripe * attempt + shell: - f"mkdir -p {pybedtools_tmp_path/'k5'} && python {annotation_python_file} scorevariants-deepripe {{input.variants}} {anno_dir} {{input.fasta}} {pybedtools_tmp_path/'k5'} {saved_deepripe_models_path} 'eclip_k5'" + f"mkdir -p {pybedtools_tmp_path/'k5'} && python {annotation_python_file} scorevariants-deepripe {{input.variants}} {anno_dir} {{input.fasta}} {pybedtools_tmp_path/'k5'} {saved_deepripe_models_path} {{threads}} 'eclip_k5'" rule all_vep: diff --git a/pipelines/config/deeprvat_annotation_config.yaml b/pipelines/config/deeprvat_annotation_config.yaml index b420881d..35aec1c7 100644 --- a/pipelines/config/deeprvat_annotation_config.yaml +++ b/pipelines/config/deeprvat_annotation_config.yaml @@ -5,12 +5,12 @@ htslib_load_cmd : module load htslib/1.9 perl_load_cmd : module load perl/5.20.2 vep_load_cmd : module load vep/108.1 -vcf_file_pattern : ukb23156_c{chr}_b{block}_v1 +bcf_file_pattern : ukb23156_c{chr}_b{block}_v1 included_chromosomes : ['21','22'] metadata_dir : input_dir/vcf/metadata pvcf_blocks_file : pvcf_blocks.txt -vcf_dir : input_dir/vcf +bcf_dir : input_dir/bcf anno_tmp_dir : output_dir/annotations/tmp anno_dir : output_dir/annotations @@ -23,10 +23,11 @@ spliceAI_indel_file : annotation_data/spliceAI/spliceai_scores.raw.indel.hg38.vc primateAI_file : annotation_data/primateAI/PrimateAI_scores_v0.2_GRCh38_sorted.tsv.bgz cadd_snv_file : annotation_data/cadd/whole_genome_SNVs.tsv.gz cadd_indel_file : annotation_data/cadd/gnomad.genomes.r3.0.indel.tsv.gz - +absplice_repo_dir : repo_dir/absplice deeprvat_repo_dir : deeprvat_repo_dir +kipoi_repo_dir : repo_dir/kipoi-veff2 variant_file_path : preprocessing_workdir/norm/variants/variants.tsv.gz pybedtools_tmp_path : output_dir/annotations/tmp/pybedtools - - - +n_jobs_deepripe : 32 +n_cores_merge_absplice : 32 +n_cores_absplice : 32