Skip to content

Commit

Permalink
bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
bfclarke committed Mar 12, 2024
1 parent 742b09f commit 8deacd9
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 29 deletions.
42 changes: 18 additions & 24 deletions deeprvat/deeprvat/associate.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,11 +474,10 @@ def compute_burdens_(

def make_regenie_input_(
debug: bool,
skip_samples: bool,
skip_covariates: bool,
skip_phenotypes: bool,
skip_burdens: bool,
burdens_genes_samples: Optional[Path],
burdens_genes_samples: Optional[Tuple[Path, Path, Path]],
repeat: int,
average_repeats: bool,
phenotype: Tuple[Tuple[str, Path, Path, Path]],
Expand All @@ -494,8 +493,6 @@ def make_regenie_input_(
## Check options
if not skip_burdens and burdens_genes_samples is None:
raise ValueError("--burdens-genes must be specified if --skip-burdens is not")
if not skip_samples and sample_file is None:
raise ValueError("Either sample_file or skip_samples must be specified")
if not skip_covariates and covariate_file is None:
raise ValueError("Either covariate_file or skip_covariates must be specified")
if not skip_phenotypes and phenotype_file is None:
Expand Down Expand Up @@ -547,22 +544,6 @@ def make_regenie_input_(

sample_df = pd.DataFrame({"FID": sample_ids, "IID": sample_ids})

if not skip_samples:
## Make sample file
logger.info(f"Creating sample file {sample_file}")
samples_out = pd.concat(
[
pd.DataFrame({"ID_1": 0, "ID_2": 0}, index=[0]),
sample_df.rename(
columns={
"FID": "ID_1",
"IID": "ID_2",
}
),
]
)
samples_out.to_csv(sample_file, sep=" ", index=False)

if not skip_covariates:
## Make covariate file
logger.info(f"Creating covariate file {covariate_file}")
Expand Down Expand Up @@ -595,9 +576,25 @@ def make_regenie_input_(
) # Might be different from those for the phenotypes
n_samples = sample_ids.shape[0]

## Make sample file
logger.info(f"Creating sample file {sample_file}")
sample_df = pd.DataFrame({"FID": sample_ids, "IID": sample_ids})
samples_out = pd.concat(
[
pd.DataFrame({"ID_1": 0, "ID_2": 0}, index=[0]),
sample_df.rename(
columns={
"FID": "ID_1",
"IID": "ID_2",
}
),
]
)
samples_out.to_csv(sample_file, sep=" ", index=False)

burdens_zarr = zarr.open(burden_file)
if not debug:
# assert burdens_zarr.shape[0] == n_samples
assert burdens_zarr.shape[0] == n_samples
assert burdens_zarr.shape[1] == n_genes

if average_repeats:
Expand Down Expand Up @@ -657,7 +654,6 @@ def make_regenie_input_(

@cli.command()
@click.option("--debug", is_flag=True)
@click.option("--skip-samples", is_flag=True)
@click.option("--skip-covariates", is_flag=True)
@click.option("--skip-phenotypes", is_flag=True)
@click.option("--skip-burdens", is_flag=True)
Expand Down Expand Up @@ -690,7 +686,6 @@ def make_regenie_input_(
@click.argument("gtf", type=click.Path(exists=True, path_type=Path))
def make_regenie_input(
debug: bool,
skip_samples: bool,
skip_covariates: bool,
skip_phenotypes: bool,
skip_burdens: bool,
Expand All @@ -707,7 +702,6 @@ def make_regenie_input(
):
make_regenie_input_(
debug=debug,
skip_samples=skip_samples,
skip_covariates=skip_covariates,
skip_phenotypes=skip_phenotypes,
skip_burdens=skip_burdens,
Expand Down
10 changes: 9 additions & 1 deletion pipelines/association_testing/plot.snakefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
configfile: 'config.yaml'

phenotypes = config['phenotypes']
phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes
training_phenotypes = config["training"].get("phenotypes", phenotypes)

n_repeats = config['n_repeats']

#requires that comparison_results.pkl is linked to the experiment directory
#requires deeprvat-analyis to be installed
DEEPRVAT_ANALYSIS_DIR = os.environ['DEEPRVAT_ANALYSIS_DIR']
Expand Down Expand Up @@ -43,4 +51,4 @@ rule compute_replication:
'--out-file {output} '
'--n-repeats {params.n_repeats} '
'{params.result_files} '
'./ '
'./ '
7 changes: 3 additions & 4 deletions pipelines/association_testing/regress_eval_regenie.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -239,20 +239,21 @@ rule make_regenie_burdens:
phenotypes = " ".join([f"--phenotype {p} {p}/deeprvat/association_dataset.pkl {p}/deeprvat/xy"
for p in phenotypes]) + " "
output:
sample_file = "regenie_input/deeprvat_pseudovariants.sample",
bgen = "regenie_input/deeprvat_pseudovariants.bgen",
threads: 8
resources:
mem_mb = 64000
shell:
"deeprvat_associate make-regenie-input "
+ debug +
"--skip-samples "
"--skip-covariates "
"--skip-phenotypes "
"--average-repeats "
"{params.phenotypes}"
"{params.phenotypes} "
# "{input.dataset} "
# "{wildcards.phenotype}/deeprvat/burdens "
"--sample-file {output.sample_file} "
"--bgen {output.bgen} "
"--burdens-genes-samples {input.burdens} {input.genes} {input.samples} "
"{input.gene_file} "
Expand All @@ -272,7 +273,6 @@ rule make_regenie_metadata:
f"{p}/deeprvat/xy"
for p in phenotypes]) + " "
output:
sample_file = "regenie_input/deeprvat_pseudovariants.sample",
covariate_file = "regenie_input/covariates.txt",
phenotype_file = "regenie_input/phenotypes.txt",
threads: 1
Expand All @@ -285,7 +285,6 @@ rule make_regenie_metadata:
"{params.phenotypes}"
# "{input.dataset} "
# "{wildcards.phenotype}/deeprvat/burdens "
"--sample-file {output.sample_file} "
"--covariate-file {output.covariate_file} "
"--phenotype-file {output.phenotype_file} "
"{input.gene_file} "
Expand Down

0 comments on commit 8deacd9

Please sign in to comment.