Skip to content

Commit

Permalink
Add resources back to preprocessing pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
endast committed Apr 18, 2024
1 parent 086bb6c commit 40c07a6
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 0 deletions.
12 changes: 12 additions & 0 deletions pipelines/preprocessing/preprocess.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ rule normalize:
vcf_file=lambda wildcards: vcf_look_up[wildcards.vcf_stem],
output:
bcf_file=bcf_dir / "{vcf_stem}.bcf",
resources:
mem_mb=lambda wildcards, attempt: 16384 * (attempt + 1),
shell:
f"""{load_bcftools} bcftools view --samples-file {{input.samplefile}} --output-type u {{params.vcf_file}} | bcftools view --include 'COUNT(GT="alt") > 0' --output-type u | bcftools norm -m-both -f {{input.fasta}} --output-type b --output {{output.bcf_file}}"""

Expand All @@ -78,6 +80,8 @@ rule sparsify:
bcf=bcf_dir / "{vcf_stem}.bcf",
output:
tsv=sparse_dir / "{vcf_stem}.tsv.gz",
resources:
mem_mb=512,
shell:
f"""{load_bcftools} bcftools query --format '[%CHROM\t%POS\t%REF\t%ALT\t%SAMPLE\t%GT\n]' --include 'GT!="RR" & GT!="mis"' {{input.bcf}} \
| sed 's/0[/,|]1/1/; s/1[/,|]0/1/; s/1[/,|]1/2/; s/0[/,|]0/0/' | gzip > {{output.tsv}}"""
Expand All @@ -88,6 +92,8 @@ rule variants:
bcf=bcf_dir / "{vcf_stem}.bcf",
output:
norm_variants_dir / "{vcf_stem}.tsv.gz",
resources:
mem_mb=512,
shell:
f"{load_bcftools} bcftools query --format '%CHROM\t%POS\t%REF\t%ALT\n' {{input}} | gzip > {{output}}"

Expand All @@ -97,6 +103,8 @@ rule concatenate_variants:
expand(norm_variants_dir / "{vcf_stem}.tsv.gz",vcf_stem=vcf_stems),
output:
norm_variants_dir / "variants_no_id.tsv.gz",
resources:
mem_mb=256,
shell:
"{zcat_cmd} {input} | gzip > {output}"

Expand All @@ -107,6 +115,8 @@ rule add_variant_ids:
output:
variants=norm_variants_dir / "variants.tsv.gz",
duplicates=qc_duplicate_vars_dir / "duplicates.tsv",
resources:
mem_mb=2048,
shell:
f"{preprocessing_cmd} add-variant-ids {{input}} {{output.variants}} {{output.duplicates}}"

Expand All @@ -117,6 +127,8 @@ rule create_parquet_variant_ids:
output:
variants=norm_variants_dir / "variants.parquet",
duplicates=qc_duplicate_vars_dir / "duplicates.parquet",
resources:
mem_mb=2048,
shell:
f"{preprocessing_cmd} add-variant-ids {{input}} {{output.variants}} {{output.duplicates}}"

Expand Down
8 changes: 8 additions & 0 deletions pipelines/preprocessing/qc.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ rule qc_allelic_imbalance:
bcf_dir / "{vcf_stem}.bcf",
output:
qc_allelic_imbalance_dir / "{vcf_stem}.tsv.gz",
resources:
mem_mb=lambda wildcards, attempt: 256 * attempt,
shell:
f"""{load_bcftools} bcftools query --format '%CHROM\t%POS\t%REF\t%ALT\n' --exclude 'COUNT(GT="het")=0 || (GT="het" & ((TYPE="snp" & (FORMAT/AD[*:1] / FORMAT/AD[*:0]) > 0.15) | (TYPE="indel" & (FORMAT/AD[*:1] / FORMAT/AD[*:0]) > 0.20)))' {{input}} | gzip > {{output}}"""

Expand All @@ -14,6 +16,8 @@ rule qc_varmiss:
bcf_dir / "{vcf_stem}.bcf",
output:
qc_varmiss_dir / "{vcf_stem}.tsv.gz",
resources:
mem_mb=lambda wildcards, attempt: 256 * attempt,
shell:
f'{load_bcftools} bcftools query --format "%CHROM\t%POS\t%REF\t%ALT\n" --include "F_MISSING >= 0.1" {{input}} | gzip > {{output}}'

Expand All @@ -23,6 +27,8 @@ rule qc_hwe:
bcf_dir / "{vcf_stem}.bcf",
output:
qc_hwe_dir / "{vcf_stem}.tsv.gz",
resources:
mem_mb=lambda wildcards, attempt: 256 * (attempt + 1),
shell:
f'{load_bcftools} bcftools +fill-tags --output-type u {{input}} -- --tags HWE | bcftools query --format "%CHROM\t%POS\t%REF\t%ALT\n" --include "INFO/HWE <= 1e-15" | gzip > {{output}}'

Expand All @@ -32,6 +38,8 @@ rule qc_read_depth:
bcf_dir / "{vcf_stem}.bcf",
output:
qc_read_depth_dir / "{vcf_stem}.tsv.gz",
resources:
mem_mb=lambda wildcards, attempt: 256 * attempt,
shell:
f"""{load_bcftools} bcftools query --format '[%CHROM\\t%POS\\t%REF\\t%ALT\\t%SAMPLE\\n]' --include '(GT!="RR" & GT!="mis" & TYPE="snp" & FORMAT/DP < 7) | (GT!="RR" & GT!="mis" & TYPE="indel" & FORMAT/DP < 10)' {{input}} | gzip > {{output}}"""

Expand Down

0 comments on commit 40c07a6

Please sign in to comment.