-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adaptations for running with DNANexus
- Loading branch information
Showing
8 changed files
with
526 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
from pathlib import Path | ||
from typing import Iterable, Union | ||
|
||
configfile: 'config.yaml' | ||
|
||
debug_flag = config.get('debug', False) | ||
phenotypes = config['phenotypes'] | ||
phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes | ||
|
||
n_burden_chunks = config.get('n_burden_chunks', 1) if not debug_flag else 2 | ||
n_repeats = config['n_repeats'] | ||
debug = '--debug ' if debug_flag else '' | ||
pretrained_model_path = Path(config.get("pretrained_model_path", "pretrained_models")) | ||
|
||
dnanexus_destination = Path(config["dnanexus"]["destination"]) | ||
dnanexus_applet = config["dnanexus"]["applet"] | ||
dnanexus_priority = config["dnanexus"].get("priority", "low") | ||
dnanexus_configfile = config["dnanexus"]["configfile"] | ||
|
||
|
||
def dx_run( | ||
command: str, | ||
mkdirs: Union[str, Iterable[str]], | ||
instance_type: str, | ||
dx_configfile: str = dnanexus_configfile, | ||
cost_limit: float = 1.00, | ||
destination: str = dnanexus_destination, | ||
applet: str = dnanexus_applet, | ||
dx_priority: str = dnanexus_priority, | ||
): | ||
if isinstance(mkdirs, str): | ||
mkdirs = [mkdirs] | ||
|
||
mkdir_string = " && ".join(f"mkdir -p {d}" for d in mkdirs) | ||
|
||
dx_run_shell = f"dx run {applet} " | ||
dx_run_shell += f"--instance-type {instance_type} " | ||
dx_run_shell += f"--priority {dx_priority} " | ||
dx_run_shell += f"--cost-limit {cost_limit} " | ||
dx_run_shell += f"-iconfig={dx_configfile} " | ||
dx_run_shell += f"-icommand='" + mkdir_string | ||
dx_run_shell += f" && {command}' " | ||
dx_run_shell += f"--destination {destination} " | ||
dx_run_shell += f"--wait " | ||
dx_run_shell += f"-y " | ||
|
||
return dx_run_shell | ||
|
||
wildcard_constraints: | ||
repeat="\d+", | ||
trial="\d+", | ||
|
||
|
||
rule all: | ||
input: | ||
expand("{phenotype}/deeprvat/burdens/chunk{chunk}.finished", | ||
phenotype=phenotypes, | ||
chunk=n_burden_chunks) | ||
|
||
rule compute_burdens: | ||
priority: 10 | ||
input: | ||
reversed = pretrained_model_path / "reverse_finished.tmp", | ||
checkpoints = lambda wildcards: [ | ||
pretrained_model_path / f'repeat_{repeat}/best/bag_{bag}.ckpt' | ||
for repeat in range(n_repeats) for bag in range(n_bags) | ||
], | ||
dataset = '{phenotype}/deeprvat/association_dataset.pkl', | ||
data_config = '{phenotype}/deeprvat/hpopt_config.yaml', | ||
model_config = pretrained_model_path / 'config.yaml', | ||
output: | ||
'{phenotype}/deeprvat/burdens/chunk{chunk}.finished' | ||
threads: 8 | ||
shell: | ||
' && '.join([ | ||
('deeprvat_associate compute-burdens ' | ||
+ debug + | ||
' --n-chunks '+ str(n_burden_chunks) + ' ' | ||
'--chunk {wildcards.chunk} ' | ||
'--dataset-file {input.dataset} ' | ||
'{input.data_config} ' | ||
'{input.model_config} ' | ||
'{input.checkpoints} ' | ||
'{wildcards.phenotype}/deeprvat/burdens'), | ||
'touch {output}' | ||
]) | ||
|
||
rule all_association_dataset: | ||
input: | ||
expand('{phenotype}/deeprvat/association_dataset.pkl', | ||
phenotype=phenotypes) | ||
|
||
rule association_dataset: | ||
input: | ||
config = '{phenotype}/deeprvat/hpopt_config.yaml' | ||
output: | ||
'{phenotype}/deeprvat/association_dataset.pkl' | ||
threads: 1 | ||
params: | ||
dx_run = lambda wildcards, input, output: dx_run( | ||
command=( | ||
'deeprvat_associate make-dataset ' | ||
+ debug + | ||
str("/mnt/project/DeepRVAT" / dnanexus_destination / f'{input.config} ') + | ||
f'{output}' | ||
), | ||
mkdirs=f"{wildcards.phenotype}/deeprvat", | ||
instance_type="mem3_ssd1_v2_x4", | ||
cost_limit=1, | ||
), | ||
shell: | ||
" && ".join([ | ||
"{params.dx_run}", | ||
"touch {output}" | ||
]) | ||
|
||
rule all_config: | ||
input: | ||
config = expand('{phenotype}/deeprvat/hpopt_config.yaml', | ||
phenotype=phenotypes), | ||
|
||
rule config: | ||
input: | ||
config = 'config.yaml', | ||
output: | ||
config = '{phenotype}/deeprvat/hpopt_config.yaml', | ||
params: | ||
dx_run = lambda wildcards, input, output: dx_run( | ||
command=( | ||
'deeprvat_config update-config ' | ||
f'--phenotype {wildcards.phenotype} ' | ||
f'{input.config} ' | ||
f'{output.config}' | ||
), | ||
mkdirs=f"{wildcards.phenotype}/deeprvat", | ||
instance_type="mem1_ssd1_v2_x2", | ||
cost_limit=0.10, | ||
), | ||
threads: 1 | ||
shell: | ||
" && ".join([ | ||
"{params.dx_run}", | ||
"touch {output}" | ||
]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
phenotypes: | ||
Calcium: | ||
correction_method: FDR | ||
n_training_genes: 100 | ||
baseline_phenotype: Calcium | ||
|
||
n_burden_chunks: 1 | ||
|
||
n_repeats: 6 | ||
|
||
do_scoretest: True | ||
|
||
dnanexus: | ||
configfile: DeepRVAT/workdir/pretrained_scoring/config.yaml | ||
destination: DeepRVAT/workdir/pretrained_scoring | ||
applet: deeprvat_burdens | ||
priority: low | ||
|
||
model: | ||
type: DeepSet | ||
model_collection: agg_models | ||
checkpoint: combined_agg.pt | ||
config: | ||
phi_layers: 2 | ||
phi_hidden_dim: 20 | ||
rho_layers: 3 | ||
rho_hidden_dim: 10 | ||
activation: LeakyReLU | ||
pool: max | ||
use_sigmoid: True | ||
metrics: | ||
objective: MSE | ||
objective_mode: min | ||
loss: MSE | ||
all: | ||
MSE: {} | ||
PearsonCorrTorch: {} | ||
MAE: {} | ||
RSquared: {} | ||
optimizer: | ||
type: AdamW | ||
config: {} | ||
|
||
data: | ||
gt_file: /mnt/project/DeepRVAT/DeepRVAT/data/preprocessed/genotypes.h5 | ||
variant_file: /mnt/project/DeepRVAT/DeepRVAT/data/variants.parquet | ||
dataset_config: | ||
min_common_af: | ||
MAF: 0.01 | ||
phenotype_file: /mnt/project/DeepRVAT/DeepRVAT/data/phenotypes.parquet | ||
y_transformation: quantile_transform | ||
x_phenotypes: | ||
- age | ||
- genetic_sex | ||
- genetic_PC_1 | ||
- genetic_PC_2 | ||
- genetic_PC_3 | ||
- genetic_PC_4 | ||
- genetic_PC_5 | ||
- genetic_PC_6 | ||
- genetic_PC_7 | ||
- genetic_PC_8 | ||
- genetic_PC_9 | ||
- genetic_PC_10 | ||
- genetic_PC_11 | ||
- genetic_PC_12 | ||
- genetic_PC_13 | ||
- genetic_PC_14 | ||
- genetic_PC_15 | ||
- genetic_PC_16 | ||
- genetic_PC_17 | ||
- genetic_PC_18 | ||
- genetic_PC_19 | ||
- genetic_PC_20 | ||
annotation_file: /mnt/project/DeepRVAT/DeepRVAT/data/annotations.parquet | ||
annotations: | ||
- MAF | ||
- MAF_MB | ||
- CADD_PHRED | ||
- CADD_raw | ||
- sift_score | ||
- polyphen_score | ||
- Consequence_splice_acceptor_variant | ||
- Consequence_splice_donor_variant | ||
- Consequence_stop_gained | ||
- Consequence_frameshift_variant | ||
- Consequence_stop_lost | ||
- Consequence_start_lost | ||
- Consequence_inframe_insertion | ||
- Consequence_inframe_deletion | ||
- Consequence_missense_variant | ||
- Consequence_protein_altering_variant | ||
- Consequence_splice_region_variant | ||
- condel_score | ||
- DeepSEA_PC_1 | ||
- DeepSEA_PC_2 | ||
- DeepSEA_PC_3 | ||
- DeepSEA_PC_4 | ||
- DeepSEA_PC_5 | ||
- DeepSEA_PC_6 | ||
- PrimateAI_score | ||
- AbSplice_DNA | ||
- DeepRipe_plus_QKI_lip_hg2 | ||
- DeepRipe_plus_QKI_clip_k5 | ||
- DeepRipe_plus_KHDRBS1_clip_k5 | ||
- DeepRipe_plus_ELAVL1_parclip | ||
- DeepRipe_plus_TARDBP_parclip | ||
- DeepRipe_plus_HNRNPD_parclip | ||
- DeepRipe_plus_MBNL1_parclip | ||
- DeepRipe_plus_QKI_parclip | ||
- SpliceAI_delta_score | ||
gene_file: /mnt/project/DeepRVAT/DeepRVAT/data/protein_coding_genes.parquet | ||
use_common_variants: False | ||
use_rare_variants: True | ||
rare_embedding: | ||
type: PaddedAnnotations | ||
config: | ||
annotations: | ||
- MAF_MB | ||
- CADD_raw | ||
- sift_score | ||
- polyphen_score | ||
- Consequence_splice_acceptor_variant | ||
- Consequence_splice_donor_variant | ||
- Consequence_stop_gained | ||
- Consequence_frameshift_variant | ||
- Consequence_stop_lost | ||
- Consequence_start_lost | ||
- Consequence_inframe_insertion | ||
- Consequence_inframe_deletion | ||
- Consequence_missense_variant | ||
- Consequence_protein_altering_variant | ||
- Consequence_splice_region_variant | ||
- condel_score | ||
- DeepSEA_PC_1 | ||
- DeepSEA_PC_2 | ||
- DeepSEA_PC_3 | ||
- DeepSEA_PC_4 | ||
- DeepSEA_PC_5 | ||
- DeepSEA_PC_6 | ||
- PrimateAI_score | ||
- AbSplice_DNA | ||
- DeepRipe_plus_QKI_lip_hg2 | ||
- DeepRipe_plus_QKI_clip_k5 | ||
- DeepRipe_plus_KHDRBS1_clip_k5 | ||
- DeepRipe_plus_ELAVL1_parclip | ||
- DeepRipe_plus_TARDBP_parclip | ||
- DeepRipe_plus_HNRNPD_parclip | ||
- DeepRipe_plus_MBNL1_parclip | ||
- DeepRipe_plus_QKI_parclip | ||
- SpliceAI_delta_score | ||
thresholds: | ||
MAF: "MAF < 1e-3" | ||
CADD_PHRED: "CADD_PHRED > 5" | ||
gene_file: /mnt/project/DeepRVAT/DeepRVAT/data/protein_coding_genes.parquet | ||
verbose: True | ||
low_memory: True | ||
verbose: True | ||
dataloader_config: | ||
batch_size: 16 | ||
num_workers: 10 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# DeepRvat-test Developer Readme | ||
|
||
<!-- | ||
TODO: Please edit this Readme.developer.md file to include information | ||
for developers or advanced users, for example: | ||
* Information about app internals and implementation details | ||
* How to report bugs or contribute to development | ||
--> | ||
|
||
## Running this app with additional computational resources | ||
|
||
This app has the following entry points: | ||
|
||
* main | ||
|
||
When running this app, you can override the instance type to be used by | ||
providing the ``systemRequirements`` field to ```/applet-XXXX/run``` or | ||
```/app-XXXX/run```, as follows: | ||
|
||
{ | ||
systemRequirements: { | ||
"main": {"instanceType": "mem2_hdd2_x2"} | ||
}, | ||
[...] | ||
} | ||
|
||
See <a | ||
href="https://documentation.dnanexus.com/developer/api/running-analyses/io-and-run-specifications#run-specification">Run | ||
Specification</a> in the API documentation for more information about the | ||
available instance types. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
<!-- dx-header --> | ||
# DeepRVAT (DNAnexus Platform App) | ||
|
||
Rare variant association testing using deep learning and data-driven burden scores | ||
|
||
This is the source code for an app that runs on the DNAnexus Platform. | ||
For more information about how to run or modify it, see | ||
https://documentation.dnanexus.com/. | ||
<!-- /dx-header --> | ||
|
||
<!-- Insert a description of your app here --> | ||
|
||
<!-- | ||
TODO: This app directory was automatically generated by dx-app-wizard; | ||
please edit this Readme.md file to include essential documentation about | ||
your app that would be helpful to users. (Also see the | ||
Readme.developer.md.) Once you're done, you can remove these TODO | ||
comments. | ||
For more info, see https://documentation.dnanexus.com/developer. | ||
--> |
Oops, something went wrong.