diff --git a/pipelines/preprocessing/preprocess.snakefile b/pipelines/preprocessing/preprocess.snakefile index 20eb0c89..cad20b3c 100644 --- a/pipelines/preprocessing/preprocess.snakefile +++ b/pipelines/preprocessing/preprocess.snakefile @@ -60,6 +60,8 @@ rule normalize: vcf_file=lambda wildcards: vcf_look_up[wildcards.vcf_stem], output: bcf_file=bcf_dir / "{vcf_stem}.bcf", + resources: + mem_mb=lambda wildcards, attempt: 16384 * (attempt + 1), shell: f"""{load_bcftools} bcftools view --samples-file {{input.samplefile}} --output-type u {{params.vcf_file}} | bcftools view --include 'COUNT(GT="alt") > 0' --output-type u | bcftools norm -m-both -f {{input.fasta}} --output-type b --output {{output.bcf_file}}""" @@ -78,6 +80,8 @@ rule sparsify: bcf=bcf_dir / "{vcf_stem}.bcf", output: tsv=sparse_dir / "{vcf_stem}.tsv.gz", + resources: + mem_mb=512, shell: f"""{load_bcftools} bcftools query --format '[%CHROM\t%POS\t%REF\t%ALT\t%SAMPLE\t%GT\n]' --include 'GT!="RR" & GT!="mis"' {{input.bcf}} \ | sed 's/0[/,|]1/1/; s/1[/,|]0/1/; s/1[/,|]1/2/; s/0[/,|]0/0/' | gzip > {{output.tsv}}""" @@ -88,6 +92,8 @@ rule variants: bcf=bcf_dir / "{vcf_stem}.bcf", output: norm_variants_dir / "{vcf_stem}.tsv.gz", + resources: + mem_mb=512, shell: f"{load_bcftools} bcftools query --format '%CHROM\t%POS\t%REF\t%ALT\n' {{input}} | gzip > {{output}}" @@ -97,6 +103,8 @@ rule concatenate_variants: expand(norm_variants_dir / "{vcf_stem}.tsv.gz",vcf_stem=vcf_stems), output: norm_variants_dir / "variants_no_id.tsv.gz", + resources: + mem_mb=256, shell: "{zcat_cmd} {input} | gzip > {output}" @@ -107,6 +115,8 @@ rule add_variant_ids: output: variants=norm_variants_dir / "variants.tsv.gz", duplicates=qc_duplicate_vars_dir / "duplicates.tsv", + resources: + mem_mb=2048, shell: f"{preprocessing_cmd} add-variant-ids {{input}} {{output.variants}} {{output.duplicates}}" @@ -117,6 +127,8 @@ rule create_parquet_variant_ids: output: variants=norm_variants_dir / "variants.parquet", duplicates=qc_duplicate_vars_dir / "duplicates.parquet", + resources: + mem_mb=2048, shell: f"{preprocessing_cmd} add-variant-ids {{input}} {{output.variants}} {{output.duplicates}}" diff --git a/pipelines/preprocessing/qc.snakefile b/pipelines/preprocessing/qc.snakefile index 8b499273..2369de45 100644 --- a/pipelines/preprocessing/qc.snakefile +++ b/pipelines/preprocessing/qc.snakefile @@ -5,6 +5,8 @@ rule qc_allelic_imbalance: bcf_dir / "{vcf_stem}.bcf", output: qc_allelic_imbalance_dir / "{vcf_stem}.tsv.gz", + resources: + mem_mb=lambda wildcards, attempt: 256 * attempt, shell: f"""{load_bcftools} bcftools query --format '%CHROM\t%POS\t%REF\t%ALT\n' --exclude 'COUNT(GT="het")=0 || (GT="het" & ((TYPE="snp" & (FORMAT/AD[*:1] / FORMAT/AD[*:0]) > 0.15) | (TYPE="indel" & (FORMAT/AD[*:1] / FORMAT/AD[*:0]) > 0.20)))' {{input}} | gzip > {{output}}""" @@ -14,6 +16,8 @@ rule qc_varmiss: bcf_dir / "{vcf_stem}.bcf", output: qc_varmiss_dir / "{vcf_stem}.tsv.gz", + resources: + mem_mb=lambda wildcards, attempt: 256 * attempt, shell: f'{load_bcftools} bcftools query --format "%CHROM\t%POS\t%REF\t%ALT\n" --include "F_MISSING >= 0.1" {{input}} | gzip > {{output}}' @@ -23,6 +27,8 @@ rule qc_hwe: bcf_dir / "{vcf_stem}.bcf", output: qc_hwe_dir / "{vcf_stem}.tsv.gz", + resources: + mem_mb=lambda wildcards, attempt: 256 * (attempt + 1), shell: f'{load_bcftools} bcftools +fill-tags --output-type u {{input}} -- --tags HWE | bcftools query --format "%CHROM\t%POS\t%REF\t%ALT\n" --include "INFO/HWE <= 1e-15" | gzip > {{output}}' @@ -32,6 +38,8 @@ rule qc_read_depth: bcf_dir / "{vcf_stem}.bcf", output: qc_read_depth_dir / "{vcf_stem}.tsv.gz", + resources: + mem_mb=lambda wildcards, attempt: 256 * attempt, shell: f"""{load_bcftools} bcftools query --format '[%CHROM\\t%POS\\t%REF\\t%ALT\\t%SAMPLE\\n]' --include '(GT!="RR" & GT!="mis" & TYPE="snp" & FORMAT/DP < 7) | (GT!="RR" & GT!="mis" & TYPE="indel" & FORMAT/DP < 10)' {{input}} | gzip > {{output}}"""