Skip to content

Commit

Permalink
bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
bfclarke committed Nov 23, 2023
1 parent 684eb23 commit 4ad34d1
Showing 1 changed file with 84 additions and 84 deletions.
168 changes: 84 additions & 84 deletions pipelines/training_association_testing.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ n_repeats = config['n_repeats']
debug = '--debug ' if debug_flag else ''
do_scoretest = '--do-scoretest ' if config.get('do_scoretest', False) else ''
tensor_compression_level = config['training'].get('tensor_compression_level', 1)
n_parallel_training_jobs = config["training"].get("n_parallel_jobs")
n_parallel_training_jobs = config["training"].get("n_parallel_jobs", 1)

wildcard_constraints:
repeat="\d+",
Expand Down Expand Up @@ -239,7 +239,7 @@ rule train:
expand('models/repeat_{repeat}/trial{trial_number}/config.yaml',
repeat=range(n_repeats), trial_number=range(n_trials)),
expand('models/repeat_{repeat}/trial{trial_number}/finished.tmp',
repeat=range(n_repeats), trial_number=range(n_trials)))
repeat=range(n_repeats), trial_number=range(n_trials))
params:
phenotypes = " ".join(
[f"--phenotype {p} "
Expand All @@ -248,7 +248,7 @@ rule train:
f"{p}/deeprvat/y.zarr"
for p in phenotypes])
shell:
f"parallel --jobs {n_parallel_training_jobs} --results train_repeat{{1}}_trial{{2}}/ "
f"parallel --jobs {n_parallel_training_jobs} --results train_repeat{{{{1}}}}_trial{{{{2}}}}/ "
'deeprvat_train train '
+ debug +
'--trial-id {{2}} '
Expand All @@ -257,89 +257,89 @@ rule train:
'models/repeat_{{1}}/trial{{2}} '
"models/repeat_{{1}}/hyperparameter_optimization.db '&&' "
"touch models/repeat_{{1}}/trial{{2}}/finished.tmp "
"::: " + " ".join(range(n_repeats)) + " "
"::: " + " ".join(range(n_trials))
"::: " + " ".join(map(str, range(n_repeats))) + " "
"::: " + " ".join(map(str, range(n_trials)))

rule all_training_dataset:
input:
input_tensor = expand('{phenotype}/deeprvat/input_tensor.zarr',
phenotype=phenotypes, repeat=range(n_repeats)),
covariates = expand('{phenotype}/deeprvat/covariates.zarr',
phenotype=phenotypes, repeat=range(n_repeats)),
y = expand('{phenotype}/deeprvat/y.zarr',
phenotype=phenotypes, repeat=range(n_repeats))
# rule all_training_dataset:
# input:
# input_tensor = expand('{phenotype}/deeprvat/input_tensor.zarr',
# phenotype=phenotypes, repeat=range(n_repeats)),
# covariates = expand('{phenotype}/deeprvat/covariates.zarr',
# phenotype=phenotypes, repeat=range(n_repeats)),
# y = expand('{phenotype}/deeprvat/y.zarr',
# phenotype=phenotypes, repeat=range(n_repeats))

rule training_dataset:
input:
config = '{phenotype}/deeprvat/hpopt_config.yaml',
training_dataset = '{phenotype}/deeprvat/training_dataset.pkl'
output:
input_tensor = directory('{phenotype}/deeprvat/input_tensor.zarr'),
covariates = directory('{phenotype}/deeprvat/covariates.zarr'),
y = directory('{phenotype}/deeprvat/y.zarr')
threads: 8
priority: 50
shell:
(
'deeprvat_train make-dataset '
+ debug +
'--compression-level ' + str(tensor_compression_level) + ' '
'--training-dataset-file {input.training_dataset} '
'{input.config} '
'{output.input_tensor} '
'{output.covariates} '
'{output.y}'
)
# rule training_dataset:
# input:
# config = '{phenotype}/deeprvat/hpopt_config.yaml',
# training_dataset = '{phenotype}/deeprvat/training_dataset.pkl'
# output:
# input_tensor = directory('{phenotype}/deeprvat/input_tensor.zarr'),
# covariates = directory('{phenotype}/deeprvat/covariates.zarr'),
# y = directory('{phenotype}/deeprvat/y.zarr')
# threads: 8
# priority: 50
# shell:
# (
# 'deeprvat_train make-dataset '
# + debug +
# '--compression-level ' + str(tensor_compression_level) + ' '
# '--training-dataset-file {input.training_dataset} '
# '{input.config} '
# '{output.input_tensor} '
# '{output.covariates} '
# '{output.y}'
# )

rule training_dataset_pickle:
input:
'{phenotype}/deeprvat/hpopt_config.yaml'
output:
'{phenotype}/deeprvat/training_dataset.pkl'
threads: 1
shell:
(
'deeprvat_train make-dataset '
'--pickle-only '
'--training-dataset-file {output} '
'{input} '
'dummy dummy dummy'
)
# rule training_dataset_pickle:
# input:
# '{phenotype}/deeprvat/hpopt_config.yaml'
# output:
# '{phenotype}/deeprvat/training_dataset.pkl'
# threads: 1
# shell:
# (
# 'deeprvat_train make-dataset '
# '--pickle-only '
# '--training-dataset-file {output} '
# '{input} '
# 'dummy dummy dummy'
# )

rule all_config:
input:
seed_genes = expand('{phenotype}/deeprvat/seed_genes.parquet',
phenotype=phenotypes),
config = expand('{phenotype}/deeprvat/hpopt_config.yaml',
phenotype=phenotypes),
baseline = expand('{phenotype}/deeprvat/baseline_results.parquet',
phenotype=phenotypes),
# rule all_config:
# input:
# seed_genes = expand('{phenotype}/deeprvat/seed_genes.parquet',
# phenotype=phenotypes),
# config = expand('{phenotype}/deeprvat/hpopt_config.yaml',
# phenotype=phenotypes),
# baseline = expand('{phenotype}/deeprvat/baseline_results.parquet',
# phenotype=phenotypes),

rule config:
input:
config = 'config.yaml',
baseline = lambda wildcards: [
str(Path(r['base']) / wildcards.phenotype / r['type'] /
'eval/burden_associations.parquet')
for r in config['baseline_results']
]
output:
seed_genes = '{phenotype}/deeprvat/seed_genes.parquet',
config = '{phenotype}/deeprvat/hpopt_config.yaml',
baseline = '{phenotype}/deeprvat/baseline_results.parquet',
threads: 1
params:
baseline_results = lambda wildcards, input: ''.join([
f'--baseline-results {b} '
for b in input.baseline
])
shell:
(
'deeprvat_config update-config '
'--phenotype {wildcards.phenotype} '
'{params.baseline_results}'
'--baseline-results-out {output.baseline} '
'--seed-genes-out {output.seed_genes} '
'{input.config} '
'{output.config}'
)
# rule config:
# input:
# config = 'config.yaml',
# baseline = lambda wildcards: [
# str(Path(r['base']) / wildcards.phenotype / r['type'] /
# 'eval/burden_associations.parquet')
# for r in config['baseline_results']
# ]
# output:
# seed_genes = '{phenotype}/deeprvat/seed_genes.parquet',
# config = '{phenotype}/deeprvat/hpopt_config.yaml',
# baseline = '{phenotype}/deeprvat/baseline_results.parquet',
# threads: 1
# params:
# baseline_results = lambda wildcards, input: ''.join([
# f'--baseline-results {b} '
# for b in input.baseline
# ])
# shell:
# (
# 'deeprvat_config update-config '
# '--phenotype {wildcards.phenotype} '
# '{params.baseline_results}'
# '--baseline-results-out {output.baseline} '
# '--seed-genes-out {output.seed_genes} '
# '{input.config} '
# '{output.config}'
# )

0 comments on commit 4ad34d1

Please sign in to comment.