Skip to content

Commit

Permalink
rename bcf_file_pattern to source_variant_file_pattern
Browse files Browse the repository at this point in the history
  • Loading branch information
endast committed Oct 19, 2023
1 parent d93b920 commit ee0f3bc
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 34 deletions.
66 changes: 33 additions & 33 deletions pipelines/annotations.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ load_vep = " ".join([config["vep_load_cmd"], "&&" if config["vep_load_cmd"] else


# init data path
vcf_pattern = config["vcf_file_pattern"]
source_variant_file_pattern = config["source_variant_file_pattern"]
bcf_dir = Path(config["bcf_dir"])
anno_tmp_dir = Path(config["anno_tmp_dir"])
anno_dir = Path(config["anno_dir"])
Expand Down Expand Up @@ -103,7 +103,7 @@ rule all:
rule aggregate_and_merge_absplice:
input:
abscore_files=expand(
[anno_tmp_dir / "absplice" / (vcf_pattern + "_AbSplice_DNA.csv")],
[anno_tmp_dir / "absplice" / (source_variant_file_pattern + "_AbSplice_DNA.csv")],
zip,
chr=chromosomes,
block=block,
Expand Down Expand Up @@ -152,7 +152,7 @@ rule concat_annotations:
pvcf = metadata_dir / config['pvcf_blocks_file'],
anno_dir = anno_dir,
vcf_files=
expand([anno_dir / f"{vcf_pattern}_merged.parquet"],
expand([anno_dir / f"{source_variant_file_pattern}_merged.parquet"],
zip,
chr=chromosomes,
block=block)
Expand All @@ -164,22 +164,22 @@ rule concat_annotations:
"concat-annotations",
"{input.pvcf}",
"{input.anno_dir}",
f"{str(vcf_pattern+'_merged.parquet').format(chr='{{chr}}', block='{{block}}')}",
f"{str(source_variant_file_pattern + '_merged.parquet').format(chr='{{chr}}', block='{{block}}')}",
"{output}",
f" --included-chromosomes {','.join(included_chromosomes)}"
])

rule merge_annotations:
input:
vep = anno_dir / (vcf_pattern + "_vep_anno.tsv"),
deepripe_parclip = anno_dir / (vcf_pattern + "_variants.parclip_deepripe.csv.gz"),
deepripe_k5 = anno_dir / (vcf_pattern + "_variants.eclip_k5_deepripe.csv.gz"),
deepripe_hg2 = anno_dir / (vcf_pattern + "_variants.eclip_hg2_deepripe.csv.gz"),
vep = anno_dir / (source_variant_file_pattern + "_vep_anno.tsv"),
deepripe_parclip = anno_dir / (source_variant_file_pattern + "_variants.parclip_deepripe.csv.gz"),
deepripe_k5 = anno_dir / (source_variant_file_pattern + "_variants.eclip_k5_deepripe.csv.gz"),
deepripe_hg2 = anno_dir / (source_variant_file_pattern + "_variants.eclip_hg2_deepripe.csv.gz"),
variant_file = variant_file


output:
anno_dir / f"{vcf_pattern}_merged.parquet",
anno_dir / f"{source_variant_file_pattern}_merged.parquet",
shell: "HEADER=$(grep -n '#Uploaded_variation' "+"{input.vep}" +"| head | cut -f 1 -d ':') && python "+f"{annotation_python_file} "+"merge-annotations $(($HEADER-1)) {input.vep} {input.deepripe_parclip} {input.deepripe_hg2} {input.deepripe_k5} {input.variant_file} {output}"

rule mv_absplice_files:
Expand All @@ -190,10 +190,10 @@ rule mv_absplice_files:
/ "data"
/ "results"
/ "hg38"
/ (vcf_pattern + "_AbSplice_DNA.csv")
/ (source_variant_file_pattern + "_AbSplice_DNA.csv")
),
output:
anno_tmp_dir / "absplice" / (vcf_pattern + "_AbSplice_DNA.csv"),
anno_tmp_dir / "absplice" / (source_variant_file_pattern + "_AbSplice_DNA.csv"),
shell:
" ".join(
[
Expand All @@ -216,7 +216,7 @@ rule absplice:
[
absplice_repo_dir
/ "example/data/resources/analysis_files/input_files"
/ (vcf_pattern + "_variants_header.vcf"),
/ (source_variant_file_pattern + "_variants_header.vcf"),
],
zip,
chr=chromosomes,
Expand All @@ -233,7 +233,7 @@ rule absplice:
/ "data"
/ "results"
/ "hg38"
/ (vcf_pattern + "_AbSplice_DNA.csv")
/ (source_variant_file_pattern + "_AbSplice_DNA.csv")
),
],
zip,
Expand All @@ -253,11 +253,11 @@ rule mod_config_absplice:

rule link_files_absplice:
input:
anno_tmp_dir / (vcf_pattern + "_variants_header.vcf"),
anno_tmp_dir / (source_variant_file_pattern + "_variants_header.vcf"),
output:
absplice_repo_dir
/ "example/data/resources/analysis_files/input_files"
/ (vcf_pattern + "_variants_header.vcf"),
/ (source_variant_file_pattern + "_variants_header.vcf"),
shell:
f"mkdir -p {absplice_repo_dir/'example/data/resources/analysis_files/input_files'} && ln -s -r {{input}} {{output}}"

Expand Down Expand Up @@ -312,7 +312,7 @@ rule concat_deepSea:
input:
expand(
[
anno_dir / (vcf_pattern + ".CLI.deepseapredict.diff.tsv"),
anno_dir / (source_variant_file_pattern + ".CLI.deepseapredict.diff.tsv"),
],
zip,
chr=chromosomes,
Expand All @@ -331,7 +331,7 @@ rule concat_deepSea:
",".join(included_chromosomes),
"--sep '\t'",
f"{anno_dir}",
str(vcf_pattern + ".CLI.deepseapredict.diff.tsv").format(
str(source_variant_file_pattern + ".CLI.deepseapredict.diff.tsv").format(
chr="{{chr}}", block="{{block}}"
),
str(metadata_dir / config["pvcf_blocks_file"]),
Expand All @@ -344,10 +344,10 @@ rule concat_deepSea:

rule deepSea:
input:
variants=anno_tmp_dir / (vcf_pattern + "_variants_header.vcf"),
variants=anno_tmp_dir / (source_variant_file_pattern + "_variants_header.vcf"),
fasta=fasta_dir / fasta_file_name,
output:
anno_dir / (vcf_pattern + ".CLI.deepseapredict.diff.tsv"),
anno_dir / (source_variant_file_pattern + ".CLI.deepseapredict.diff.tsv"),
conda:
"kipoi-veff2"
shell:
Expand All @@ -358,32 +358,32 @@ rule deepSea:

rule deepRiPe_parclip:
input:
variants=anno_tmp_dir / (vcf_pattern + "_variants.vcf"),
variants=anno_tmp_dir / (source_variant_file_pattern + "_variants.vcf"),
fasta=fasta_dir / fasta_file_name,
output:
anno_dir / (vcf_pattern + "_variants.parclip_deepripe.csv.gz"),
anno_dir / (source_variant_file_pattern + "_variants.parclip_deepripe.csv.gz"),

shell:
f"mkdir -p {pybedtools_tmp_path/'parclip'} && python {annotation_python_file} scorevariants-deepripe {{input.variants}} {anno_dir} {{input.fasta}} {pybedtools_tmp_path/'parclip'} {saved_deepripe_models_path} {{threads}} 'parclip'"


rule deepRiPe_eclip_hg2:
input:
variants=anno_tmp_dir / (vcf_pattern + "_variants.vcf"),
variants=anno_tmp_dir / (source_variant_file_pattern + "_variants.vcf"),
fasta=fasta_dir / fasta_file_name,
output:
anno_dir / (vcf_pattern + "_variants.eclip_hg2_deepripe.csv.gz"),
anno_dir / (source_variant_file_pattern + "_variants.eclip_hg2_deepripe.csv.gz"),
threads: lambda wildcards, attempt: n_jobs_deepripe * attempt
shell:
f"mkdir -p {pybedtools_tmp_path/'hg2'} && python {annotation_python_file} scorevariants-deepripe {{input.variants}} {anno_dir} {{input.fasta}} {pybedtools_tmp_path/'hg2'} {saved_deepripe_models_path} {{threads}} 'eclip_hg2'"


rule deepRiPe_eclip_k5:
input:
variants=anno_tmp_dir / (vcf_pattern + "_variants.vcf"),
variants=anno_tmp_dir / (source_variant_file_pattern + "_variants.vcf"),
fasta=fasta_dir / fasta_file_name,
output:
anno_dir / (vcf_pattern + "_variants.eclip_k5_deepripe.csv.gz"),
anno_dir / (source_variant_file_pattern + "_variants.eclip_k5_deepripe.csv.gz"),

threads: lambda wildcards, attempt: n_jobs_deepripe * attempt
shell:
Expand All @@ -393,10 +393,10 @@ rule deepRiPe_eclip_k5:

rule vep:
input:
vcf=anno_tmp_dir / (vcf_pattern + "_stripped.vcf.gz"),
vcf=anno_tmp_dir / (source_variant_file_pattern + "_stripped.vcf.gz"),
fasta=fasta_dir / fasta_file_name,
output:
anno_dir / (vcf_pattern + "_vep_anno.tsv"),
anno_dir / (source_variant_file_pattern + "_vep_anno.tsv"),
threads: vep_nfork

shell:
Expand Down Expand Up @@ -448,9 +448,9 @@ rule vep:

rule extract_with_header:
input:
bcf_dir / (vcf_pattern + ".bcf"),
bcf_dir / (source_variant_file_pattern + ".bcf"),
output:
anno_tmp_dir / (vcf_pattern + "_variants_header.vcf"),
anno_tmp_dir / (source_variant_file_pattern + "_variants_header.vcf"),
shell:
(
load_bfc
Expand All @@ -464,18 +464,18 @@ rule extract_with_header:

rule strip_chr_name:
input:
anno_tmp_dir / (vcf_pattern + "_variants.vcf"),
anno_tmp_dir / (source_variant_file_pattern + "_variants.vcf"),
output:
anno_tmp_dir / (vcf_pattern + "_stripped.vcf.gz"),
anno_tmp_dir / (source_variant_file_pattern + "_stripped.vcf.gz"),
shell:
f"{load_hts} cut -c 4- {{input}} |bgzip > {{output}}"


rule extract_variants:
input:
bcf_dir / (vcf_pattern + ".bcf"),
bcf_dir / (source_variant_file_pattern + ".bcf"),
output:
anno_tmp_dir / (vcf_pattern + "_variants.vcf"),
anno_tmp_dir / (source_variant_file_pattern + "_variants.vcf"),
shell:
" ".join(
[
Expand Down
2 changes: 1 addition & 1 deletion pipelines/config/deeprvat_annotation_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ htslib_load_cmd : module load htslib/1.9
perl_load_cmd : module load perl/5.20.2
vep_load_cmd : module load vep/108.1

bcf_file_pattern : ukb23156_c{chr}_b{block}_v1
source_variant_file_pattern : ukb23156_c{chr}_b{block}_v1
included_chromosomes : ['21','22']

metadata_dir : input_dir/vcf/metadata
Expand Down

0 comments on commit ee0f3bc

Please sign in to comment.