Skip to content

Commit

Permalink
adaptations for running with DNANexus
Browse files Browse the repository at this point in the history
  • Loading branch information
bfclarke committed Sep 20, 2023
1 parent 284eb25 commit 51c21dd
Show file tree
Hide file tree
Showing 8 changed files with 526 additions and 4 deletions.
10 changes: 6 additions & 4 deletions deeprvat/deeprvat/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,19 @@ def update_config(
new_config_file: str,
):
if seed_gene_dir is None and len(baseline_results) == 0:
raise ValueError(
"One of --seed-gene-dir and --baseline-results " "must be specified"
)
logger.warning("Neither --seed-gene-dir nor --baseline-results "
"were specified. This will result in errors if "
"when trying to train DeepRVAT using the resulting "
"config files.")

with open(old_config_file) as f:
config = yaml.safe_load(f)

if phenotype is not None:
logger.info(f"Updating config for phenotype {phenotype}")
config["data"]["dataset_config"]["y_phenotypes"] = [phenotype]
config["training_data"]["dataset_config"]["y_phenotypes"] = [phenotype]
if "training_data" in config:
config["training_data"]["dataset_config"]["y_phenotypes"] = [phenotype]

# For using seed genes from results of baseline methods
if len(baseline_results) > 0:
Expand Down
144 changes: 144 additions & 0 deletions dnanexus/association_testing_pretrained.snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
from pathlib import Path
from typing import Iterable, Union

configfile: 'config.yaml'

debug_flag = config.get('debug', False)
phenotypes = config['phenotypes']
phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes

n_burden_chunks = config.get('n_burden_chunks', 1) if not debug_flag else 2
n_repeats = config['n_repeats']
debug = '--debug ' if debug_flag else ''
pretrained_model_path = Path(config.get("pretrained_model_path", "pretrained_models"))

dnanexus_destination = Path(config["dnanexus"]["destination"])
dnanexus_applet = config["dnanexus"]["applet"]
dnanexus_priority = config["dnanexus"].get("priority", "low")
dnanexus_configfile = config["dnanexus"]["configfile"]


def dx_run(
command: str,
mkdirs: Union[str, Iterable[str]],
instance_type: str,
dx_configfile: str = dnanexus_configfile,
cost_limit: float = 1.00,
destination: str = dnanexus_destination,
applet: str = dnanexus_applet,
dx_priority: str = dnanexus_priority,
):
if isinstance(mkdirs, str):
mkdirs = [mkdirs]

mkdir_string = " && ".join(f"mkdir -p {d}" for d in mkdirs)

dx_run_shell = f"dx run {applet} "
dx_run_shell += f"--instance-type {instance_type} "
dx_run_shell += f"--priority {dx_priority} "
dx_run_shell += f"--cost-limit {cost_limit} "
dx_run_shell += f"-iconfig={dx_configfile} "
dx_run_shell += f"-icommand='" + mkdir_string
dx_run_shell += f" && {command}' "
dx_run_shell += f"--destination {destination} "
dx_run_shell += f"--wait "
dx_run_shell += f"-y "

return dx_run_shell

wildcard_constraints:
repeat="\d+",
trial="\d+",


rule all:
input:
expand("{phenotype}/deeprvat/burdens/chunk{chunk}.finished",
phenotype=phenotypes,
chunk=n_burden_chunks)

rule compute_burdens:
priority: 10
input:
reversed = pretrained_model_path / "reverse_finished.tmp",
checkpoints = lambda wildcards: [
pretrained_model_path / f'repeat_{repeat}/best/bag_{bag}.ckpt'
for repeat in range(n_repeats) for bag in range(n_bags)
],
dataset = '{phenotype}/deeprvat/association_dataset.pkl',
data_config = '{phenotype}/deeprvat/hpopt_config.yaml',
model_config = pretrained_model_path / 'config.yaml',
output:
'{phenotype}/deeprvat/burdens/chunk{chunk}.finished'
threads: 8
shell:
' && '.join([
('deeprvat_associate compute-burdens '
+ debug +
' --n-chunks '+ str(n_burden_chunks) + ' '
'--chunk {wildcards.chunk} '
'--dataset-file {input.dataset} '
'{input.data_config} '
'{input.model_config} '
'{input.checkpoints} '
'{wildcards.phenotype}/deeprvat/burdens'),
'touch {output}'
])

rule all_association_dataset:
input:
expand('{phenotype}/deeprvat/association_dataset.pkl',
phenotype=phenotypes)

rule association_dataset:
input:
config = '{phenotype}/deeprvat/hpopt_config.yaml'
output:
'{phenotype}/deeprvat/association_dataset.pkl'
threads: 1
params:
dx_run = lambda wildcards, input, output: dx_run(
command=(
'deeprvat_associate make-dataset '
+ debug +
str("/mnt/project/DeepRVAT" / dnanexus_destination / f'{input.config} ') +
f'{output}'
),
mkdirs=f"{wildcards.phenotype}/deeprvat",
instance_type="mem3_ssd1_v2_x4",
cost_limit=1,
),
shell:
" && ".join([
"{params.dx_run}",
"touch {output}"
])

rule all_config:
input:
config = expand('{phenotype}/deeprvat/hpopt_config.yaml',
phenotype=phenotypes),

rule config:
input:
config = 'config.yaml',
output:
config = '{phenotype}/deeprvat/hpopt_config.yaml',
params:
dx_run = lambda wildcards, input, output: dx_run(
command=(
'deeprvat_config update-config '
f'--phenotype {wildcards.phenotype} '
f'{input.config} '
f'{output.config}'
),
mkdirs=f"{wildcards.phenotype}/deeprvat",
instance_type="mem1_ssd1_v2_x2",
cost_limit=0.10,
),
threads: 1
shell:
" && ".join([
"{params.dx_run}",
"touch {output}"
])
161 changes: 161 additions & 0 deletions dnanexus/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
phenotypes:
Calcium:
correction_method: FDR
n_training_genes: 100
baseline_phenotype: Calcium

n_burden_chunks: 1

n_repeats: 6

do_scoretest: True

dnanexus:
configfile: DeepRVAT/workdir/pretrained_scoring/config.yaml
destination: DeepRVAT/workdir/pretrained_scoring
applet: deeprvat_burdens
priority: low

model:
type: DeepSet
model_collection: agg_models
checkpoint: combined_agg.pt
config:
phi_layers: 2
phi_hidden_dim: 20
rho_layers: 3
rho_hidden_dim: 10
activation: LeakyReLU
pool: max
use_sigmoid: True
metrics:
objective: MSE
objective_mode: min
loss: MSE
all:
MSE: {}
PearsonCorrTorch: {}
MAE: {}
RSquared: {}
optimizer:
type: AdamW
config: {}

data:
gt_file: /mnt/project/DeepRVAT/DeepRVAT/data/preprocessed/genotypes.h5
variant_file: /mnt/project/DeepRVAT/DeepRVAT/data/variants.parquet
dataset_config:
min_common_af:
MAF: 0.01
phenotype_file: /mnt/project/DeepRVAT/DeepRVAT/data/phenotypes.parquet
y_transformation: quantile_transform
x_phenotypes:
- age
- genetic_sex
- genetic_PC_1
- genetic_PC_2
- genetic_PC_3
- genetic_PC_4
- genetic_PC_5
- genetic_PC_6
- genetic_PC_7
- genetic_PC_8
- genetic_PC_9
- genetic_PC_10
- genetic_PC_11
- genetic_PC_12
- genetic_PC_13
- genetic_PC_14
- genetic_PC_15
- genetic_PC_16
- genetic_PC_17
- genetic_PC_18
- genetic_PC_19
- genetic_PC_20
annotation_file: /mnt/project/DeepRVAT/DeepRVAT/data/annotations.parquet
annotations:
- MAF
- MAF_MB
- CADD_PHRED
- CADD_raw
- sift_score
- polyphen_score
- Consequence_splice_acceptor_variant
- Consequence_splice_donor_variant
- Consequence_stop_gained
- Consequence_frameshift_variant
- Consequence_stop_lost
- Consequence_start_lost
- Consequence_inframe_insertion
- Consequence_inframe_deletion
- Consequence_missense_variant
- Consequence_protein_altering_variant
- Consequence_splice_region_variant
- condel_score
- DeepSEA_PC_1
- DeepSEA_PC_2
- DeepSEA_PC_3
- DeepSEA_PC_4
- DeepSEA_PC_5
- DeepSEA_PC_6
- PrimateAI_score
- AbSplice_DNA
- DeepRipe_plus_QKI_lip_hg2
- DeepRipe_plus_QKI_clip_k5
- DeepRipe_plus_KHDRBS1_clip_k5
- DeepRipe_plus_ELAVL1_parclip
- DeepRipe_plus_TARDBP_parclip
- DeepRipe_plus_HNRNPD_parclip
- DeepRipe_plus_MBNL1_parclip
- DeepRipe_plus_QKI_parclip
- SpliceAI_delta_score
gene_file: /mnt/project/DeepRVAT/DeepRVAT/data/protein_coding_genes.parquet
use_common_variants: False
use_rare_variants: True
rare_embedding:
type: PaddedAnnotations
config:
annotations:
- MAF_MB
- CADD_raw
- sift_score
- polyphen_score
- Consequence_splice_acceptor_variant
- Consequence_splice_donor_variant
- Consequence_stop_gained
- Consequence_frameshift_variant
- Consequence_stop_lost
- Consequence_start_lost
- Consequence_inframe_insertion
- Consequence_inframe_deletion
- Consequence_missense_variant
- Consequence_protein_altering_variant
- Consequence_splice_region_variant
- condel_score
- DeepSEA_PC_1
- DeepSEA_PC_2
- DeepSEA_PC_3
- DeepSEA_PC_4
- DeepSEA_PC_5
- DeepSEA_PC_6
- PrimateAI_score
- AbSplice_DNA
- DeepRipe_plus_QKI_lip_hg2
- DeepRipe_plus_QKI_clip_k5
- DeepRipe_plus_KHDRBS1_clip_k5
- DeepRipe_plus_ELAVL1_parclip
- DeepRipe_plus_TARDBP_parclip
- DeepRipe_plus_HNRNPD_parclip
- DeepRipe_plus_MBNL1_parclip
- DeepRipe_plus_QKI_parclip
- SpliceAI_delta_score
thresholds:
MAF: "MAF < 1e-3"
CADD_PHRED: "CADD_PHRED > 5"
gene_file: /mnt/project/DeepRVAT/DeepRVAT/data/protein_coding_genes.parquet
verbose: True
low_memory: True
verbose: True
dataloader_config:
batch_size: 16
num_workers: 10
31 changes: 31 additions & 0 deletions dnanexus/deeprvat_burdens/Readme.developer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# DeepRvat-test Developer Readme

<!--
TODO: Please edit this Readme.developer.md file to include information
for developers or advanced users, for example:
* Information about app internals and implementation details
* How to report bugs or contribute to development
-->

## Running this app with additional computational resources

This app has the following entry points:

* main

When running this app, you can override the instance type to be used by
providing the ``systemRequirements`` field to ```/applet-XXXX/run``` or
```/app-XXXX/run```, as follows:

{
systemRequirements: {
"main": {"instanceType": "mem2_hdd2_x2"}
},
[...]
}

See <a
href="https://documentation.dnanexus.com/developer/api/running-analyses/io-and-run-specifications#run-specification">Run
Specification</a> in the API documentation for more information about the
available instance types.
21 changes: 21 additions & 0 deletions dnanexus/deeprvat_burdens/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<!-- dx-header -->
# DeepRVAT (DNAnexus Platform App)

Rare variant association testing using deep learning and data-driven burden scores

This is the source code for an app that runs on the DNAnexus Platform.
For more information about how to run or modify it, see
https://documentation.dnanexus.com/.
<!-- /dx-header -->

<!-- Insert a description of your app here -->

<!--
TODO: This app directory was automatically generated by dx-app-wizard;
please edit this Readme.md file to include essential documentation about
your app that would be helpful to users. (Also see the
Readme.developer.md.) Once you're done, you can remove these TODO
comments.
For more info, see https://documentation.dnanexus.com/developer.
-->
Loading

0 comments on commit 51c21dd

Please sign in to comment.