diff --git a/pipelines/preprocess_no_qc.snakefile b/pipelines/preprocess_no_qc.snakefile index 797f39db..a98c60d6 100644 --- a/pipelines/preprocess_no_qc.snakefile +++ b/pipelines/preprocess_no_qc.snakefile @@ -1,11 +1,13 @@ include: "preprocessing/preprocess.snakefile" + rule all: input: preprocessed_dir / "genotypes.h5", norm_variants_dir / "variants.tsv.gz", variants=norm_variants_dir / "variants.parquet", + rule preprocess_no_qc: input: variants=norm_variants_dir / "variants.tsv.gz", diff --git a/pipelines/preprocessing/preprocess.snakefile b/pipelines/preprocessing/preprocess.snakefile index 9da3a856..87320690 100644 --- a/pipelines/preprocessing/preprocess.snakefile +++ b/pipelines/preprocessing/preprocess.snakefile @@ -1,7 +1,9 @@ from pathlib import Path + configfile: "config/deeprvat_preprocess_config.yaml" + load_samtools = config.get("samtools_load_cmd") or "" load_bcftools = config.get("bcftools_load_cmd") or "" zcat_cmd = config.get("zcat_cmd") or "zcat" @@ -37,9 +39,8 @@ qc_filtered_samples_dir = qc_dir / "filtered_samples" with open(config["vcf_files_list"]) as file: - vcf_files = [Path(line.rstrip()) for line in file] - vcf_stems = [vf.stem.replace(".vcf","") for vf in vcf_files] + vcf_stems = [vf.stem.replace(".vcf", "") for vf in vcf_files] assert len(vcf_stems) == len(vcf_files)