From 5b5c8b60202f10136527eb2f2268f57a10cdf270 Mon Sep 17 00:00:00 2001 From: Eva Holtkamp <59055511+HolEv@users.noreply.github.com> Date: Tue, 28 May 2024 16:00:52 +0200 Subject: [PATCH 1/3] fix missing variant_file (#104) * fix missing variant_file * fixup! Format Python code with psf/black pull_request * fix variant_file position in config --------- Co-authored-by: PMBio --- deeprvat/seed_gene_discovery/config.yaml | 2 +- deeprvat/seed_gene_discovery/seed_gene_discovery.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/deeprvat/seed_gene_discovery/config.yaml b/deeprvat/seed_gene_discovery/config.yaml index a90e0a2e..0057dab4 100644 --- a/deeprvat/seed_gene_discovery/config.yaml +++ b/deeprvat/seed_gene_discovery/config.yaml @@ -60,8 +60,8 @@ variant_file: variants.parquet data: gt_file: genotypes.h5 + variant_file: variants.parquet dataset_config: - variant_file: variants.parquet phenotype_file: phenotypes.parquet standardize_xpheno: False y_transformation: quantile_transform diff --git a/deeprvat/seed_gene_discovery/seed_gene_discovery.py b/deeprvat/seed_gene_discovery/seed_gene_discovery.py index 6a187a7e..364b09ff 100644 --- a/deeprvat/seed_gene_discovery/seed_gene_discovery.py +++ b/deeprvat/seed_gene_discovery/seed_gene_discovery.py @@ -537,13 +537,12 @@ def make_dataset_( dataset = pickle.load(f) else: logger.info("Instantiating dataset") - variant_file = data_config.get( - "variant_file", f'{data_config["gt_file"][:-3]}_variants.parquet' - ) + dataset = DenseGTDataset( gt_file=data_config["gt_file"], skip_y_na=True, skip_x_na=True, + variant_file=data_config["variant_file"], **data_config["dataset_config"], ) logger.info("Writing pickled data set") From db4837403fe88b260323c8058da5ac636bca0fea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20M=C3=BCck?= Date: Wed, 29 May 2024 13:33:22 +0200 Subject: [PATCH 2/3] changed config to use normalized bcfs as default input. (#105) --- pipelines/config/deeprvat_annotation_config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/config/deeprvat_annotation_config.yaml b/pipelines/config/deeprvat_annotation_config.yaml index ecf12f10..3608d45f 100644 --- a/pipelines/config/deeprvat_annotation_config.yaml +++ b/pipelines/config/deeprvat_annotation_config.yaml @@ -8,12 +8,12 @@ fasta_file_name : hg38.fa gtf_file_name : gencode.v44.annotation.gtf.gz source_variant_file_pattern : test_vcf_data_c{chr}_b{block} -source_variant_file_type: 'vcf.gz' +source_variant_file_type: 'bcf' # comment out / remove to run on all chromosomes included_chromosomes : ['21','22'] -source_variant_dir : input_dir/vcf +source_variant_dir : preprocessing_workdir/norm/bcf anno_tmp_dir : output_dir/annotations/tmp anno_dir : output_dir/annotations From 901ff9706d7e796c61ee85ec76f97696efce6ae3 Mon Sep 17 00:00:00 2001 From: Magnus Wahlberg Date: Wed, 29 May 2024 14:33:03 +0200 Subject: [PATCH 3/3] add -p flag (#106) --- .github/workflows/run-pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-pipeline.yml b/.github/workflows/run-pipeline.yml index 47f02e8b..b2e3d469 100644 --- a/.github/workflows/run-pipeline.yml +++ b/.github/workflows/run-pipeline.yml @@ -82,7 +82,7 @@ jobs: python -m snakemake ${{ (inputs.dry_run && '-n') || '' }} \ -j 2 --directory ${{inputs.pipeline_directory}} \ ${{ (endsWith(inputs.pipeline_config, 'ml') && '--configfile') || '' }} ${{ inputs.pipeline_config }} \ - --snakefile ${{inputs.pipeline_file}} --show-failed-logs -F ${{ inputs.pipeline_extra_flags }} + --snakefile ${{inputs.pipeline_file}} --show-failed-logs -F -p ${{ inputs.pipeline_extra_flags }} shell: micromamba-shell {0} - name: Run post pipeline cmd if: inputs.postrun_cmd