Skip to content

Commit

Permalink
checkout preprocssing files from main
Browse files Browse the repository at this point in the history
  • Loading branch information
HolEv committed Feb 22, 2024
1 parent 3c62850 commit 5ac374e
Show file tree
Hide file tree
Showing 5 changed files with 263 additions and 305 deletions.
305 changes: 0 additions & 305 deletions pipelines/preprocess.snakefile

This file was deleted.

33 changes: 33 additions & 0 deletions pipelines/preprocess_no_qc.snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
include: "preprocessing/preprocess.snakefile"


rule all:
input:
preprocessed_dir / "genotypes.h5",
norm_variants_dir / "variants.tsv.gz",
variants=norm_variants_dir / "variants.parquet",


rule preprocess_no_qc:
input:
variants=norm_variants_dir / "variants.tsv.gz",
variants_parquet=norm_variants_dir / "variants.parquet",
samples=norm_dir / "samples_chr.csv",
sparse_tg=expand(sparse_dir / "{vcf_stem}.tsv.gz", vcf_stem=vcf_stems),
output:
expand(preprocessed_dir / "genotypes_chr{chr}.h5", chr=chromosomes),
shell:
" ".join(
[
f"{preprocessing_cmd}",
"process-sparse-gt",
f"--exclude-variants {qc_duplicate_vars_dir}",
"--chromosomes ",
",".join(str(chr) for chr in set(chromosomes)),
f"--threads {preprocess_threads}",
"{input.variants}",
"{input.samples}",
f"{sparse_dir}",
f"{preprocessed_dir / 'genotypes'}",
]
)
49 changes: 49 additions & 0 deletions pipelines/preprocess_with_qc.snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@

include: "preprocessing/preprocess.snakefile"
include: "preprocessing/qc.snakefile"


rule all:
input:
preprocessed_dir / "genotypes.h5",
norm_variants_dir / "variants.tsv.gz",
variants=norm_variants_dir / "variants.parquet",


rule preprocess_with_qc:
input:
variants=norm_variants_dir / "variants.tsv.gz",
variants_parquet=norm_variants_dir / "variants.parquet",
samples=norm_dir / "samples_chr.csv",
sparse_tg=expand(sparse_dir / "{vcf_stem}.tsv.gz", vcf_stem=vcf_stems),
qc_varmiss=expand(qc_varmiss_dir / "{vcf_stem}.tsv.gz", vcf_stem=vcf_stems),
qc_hwe=expand(qc_hwe_dir / "{vcf_stem}.tsv.gz", vcf_stem=vcf_stems),
qc_read_depth=expand(
qc_read_depth_dir / "{vcf_stem}.tsv.gz", vcf_stem=vcf_stems
),
qc_allelic_imbalance=expand(
qc_allelic_imbalance_dir / "{vcf_stem}.tsv.gz", vcf_stem=vcf_stems
),
qc_filtered_samples=qc_filtered_samples_dir,
output:
expand(preprocessed_dir / "genotypes_chr{chr}.h5", chr=chromosomes),
shell:
" ".join(
[
f"{preprocessing_cmd}",
"process-sparse-gt",
f"--exclude-variants {qc_allelic_imbalance_dir}",
f"--exclude-variants {qc_hwe_dir}",
f"--exclude-variants {qc_varmiss_dir}",
f"--exclude-variants {qc_duplicate_vars_dir}",
f"--exclude-calls {qc_read_depth_dir}",
f"--exclude-samples {qc_filtered_samples_dir}",
"--chromosomes ",
",".join(str(chr) for chr in set(chromosomes)),
f"--threads {preprocess_threads}",
"{input.variants}",
"{input.samples}",
f"{sparse_dir}",
f"{preprocessed_dir / 'genotypes'}",
]
)
Loading

0 comments on commit 5ac374e

Please sign in to comment.