From 66dfd32d8ea9a068ec4c75af57acec982e64f078 Mon Sep 17 00:00:00 2001 From: Jean-Francois Pombert Date: Thu, 23 Dec 2021 12:42:37 -0600 Subject: [PATCH] 0.8.0; based on AlphaFold 2.1.0 --- Notes/Installation_notes.sh | 2 ++ Prediction/AlphaFold2/alphafold.pl | 24 ++++++++++++++++-------- README.md | 18 +++++++++++++----- run_3DFI.pl | 24 ++++++++++++++++++++---- setup_3DFI.pl | 14 ++++++++++---- 5 files changed, 61 insertions(+), 21 deletions(-) diff --git a/Notes/Installation_notes.sh b/Notes/Installation_notes.sh index fd428d0..deddfac 100644 --- a/Notes/Installation_notes.sh +++ b/Notes/Installation_notes.sh @@ -1,6 +1,8 @@ ## On Fedora 34 ##### Installing Aria2, Conda and Docker ##### sudo dnf install aria2 conda docker +sudo dnf install python-docker +sudo dnf install python-absl ## absl is required for alphafold ## Starting Docker / enabling at boot sudo systemctl start docker diff --git a/Prediction/AlphaFold2/alphafold.pl b/Prediction/AlphaFold2/alphafold.pl index 6ae7592..67a0f95 100755 --- a/Prediction/AlphaFold2/alphafold.pl +++ b/Prediction/AlphaFold2/alphafold.pl @@ -1,8 +1,8 @@ #!/usr/bin/perl ## Pombert Lab, Illinois Tech, 2021 my $name = 'alphafold.pl'; -my $version = '0.5'; ## to test with alphafold fork -my $updated = '2021-09-18'; +my $version = '0.5a'; ## Update to match Alphafold 2.1 cmd line switches +my $updated = '2021-12-23'; use strict; use warnings; @@ -34,7 +34,8 @@ -o (--outdir) Output directory -d (--docker) Docker image name [Default: alphafold_3dfi] -m (--max_date) --max_template_date option (YYYY-MM-DD) from AlphaFold2 [Default: current date] --p (--preset) Alphafold preset: full_dbs, reduced_dbs or casp14 [Default: full_dbs] +-p (--preset) Alphafold --db_preset: full_dbs or reduced_dbs [Default: full_dbs] +-u (--use_msas) Use precomputed MSAs -g (--gpu_dev) List of GPU devices to use: e.g. all; 0,1; 0,1,2,3 [Default: all] -n (--no_gpu) Turns off GPU acceleration -ah (--alpha_home) AlphaFold2 installation directory [Default: \$ALPHAFOLD_HOME] @@ -48,6 +49,7 @@ my $docker_image_name = 'alphafold_3dfi'; my $max_date = strftime("%F", localtime); my $preset = 'full_dbs'; +my $precomputed_msas; my $gpus = 'all'; my $no_gpu; my $alpha_home; @@ -58,6 +60,7 @@ 'd|docker=s' => $docker_image_name, 'm|max_date=s' => \$max_date, 'p|preset=s' => \$preset, + 'u|use_msas' => \$precomputed_msas, 'g|gpu_dev=s' => $gpus, 'n|no_gpu' => \$no_gpu, 'ah|alpha_home=s' => \$alpha_home, @@ -103,7 +106,7 @@ casp14 => '' ); unless (exists $presets{$preset}){ - die "Unrecognized AlphaFold2 preset. Please use full_dbs, reduced_dbs, or casp14\n"; + die "Unrecognized AlphaFold2 preset. Please use full_dbs or reduced_dbs\n"; } ### Checking output directory + creating log file @@ -115,7 +118,7 @@ print LOG "\nCOMMAND = $name @command\n"; print LOG "\nFolding started on $timestamp\n"; print LOG "\nSetting AlphaFold2 --max_template_date option to: $max_date\n\n"; -print "\nSetting AlphaFold2 options --preset to $preset and --max_template_date to $max_date\n"; +print "\nSetting AlphaFold2 options --db_preset to $preset, --max_template_date to $max_date, and --docker_image_name to $docker_image_name\n"; ### Running AlphaFold2 docker image my $prefix; @@ -135,23 +138,28 @@ print "\n$time: working on $fasta\n"; my $start = time; + ## Gpu check my $gpu_devices = "--gpu_devices=$gpus"; - my $gpu_check = ''; if ($no_gpu){ $gpu_check = '--use_gpu=False'; $gpu_devices = ''; } + ## MSA + my $msa = 'False'; + if ($precomputed_msas){ $msa = 'True'; } + # Folding system "python3 \\ $alpha_home/docker/run_docker.py \\ --fasta_paths=$fasta \\ --docker_image_name=$docker_image_name \\ - --download_dir=$alpha_db \\ + --data_dir=$alpha_db \\ --output_dir=$outdir \\ --max_template_date=$max_date \\ - --preset=$preset \\ + --db_preset=$preset \\ + --use_precomputed_msas=$msa \\ $gpu_devices \\ $gpu_check "; diff --git a/README.md b/README.md index 5b429f9..5afa6b7 100755 --- a/README.md +++ b/README.md @@ -194,6 +194,9 @@ cd 3DFI/ -pyr (--pyrosetta) PyRosetta4 [Python-3.7.Release] .tar.bz2 archive to install # Download - https://www.pyrosetta.org/downloads#h.xe4c0yjfkl19 # License - https://els2.comotion.uw.edu/product/pyrosetta + +## Docker +-name (--docker_image) Name of the AlphaFold docker image to build [Default: alphafold_3dfi] ``` @@ -336,12 +339,16 @@ run_3DFI.pl \ ## 3D Folding options -n (--nogpu) ALPHAFOLD/ROSETTAFOLD: Turn off GPU acceleration / use CPU only +-g (--gpu_dev) ALPHAFOLD: list of GPU devices to use: e.g. all; 0,1; 0,1,2,3 [Default: all] -m (--maxdate) ALPHAFOLD: --max_template_date option (YYYY-MM-DD) [Default: current date] --k (--ranks) RAPTORX: Number of top ranks to model [Default: 5] +-s (--preset) ALPHAFOLD: full_dbs or reduced_dbs [Default: full_dbs] +-i (--docker_image) ALPHAFOLD: docker image name [Default: alphafold_3dfi] +-u (--use_msas) ALPHAFOLD: Use precomputed MSAs +-k (--ranks) RAPTORX: \# Number of top ranks to model [Default: 5] --modeller RAPTORX: Modeller version [Default: mod10.1] ## Structural homology / alignment --d (--db) 3DFI database location containing the RCSB PDB files / GESAMT archive [Default: $TDFI_DB] +-d (--db) 3DFI database location containing the RCSB PDB files / GESAMT archive [Default: \$TDFI_DB] -q (--qscore) Mininum Q-score to keep [Default: 0.3] -b (--best) Keep the best match(es) only (top X hits) [Default: 5] --query Models to query per protein and predictor: all or best [Default: all] @@ -712,11 +719,12 @@ alphafold.pl \ -o (--outdir) Output directory -d (--docker) Docker image name [Default: alphafold_3dfi] -m (--max_date) --max_template_date option (YYYY-MM-DD) from AlphaFold2 [Default: current date] --p (--preset) Alphafold preset: full_dbs, reduced_dbs or casp14 [Default: full_dbs] +-p (--preset) Alphafold --db_preset: full_dbs or reduced_dbs [Default: full_dbs] +-u (--use_msas) Use precomputed MSAs -g (--gpu_dev) List of GPU devices to use: e.g. all; 0,1; 0,1,2,3 [Default: all] -n (--no_gpu) Turns off GPU acceleration --ah (--alpha_home) AlphaFold2 installation directory [Default: $ALPHAFOLD_HOME] --ad (--alpha_db) AlphaFold2 databases location [Default: $TDFI_DB/ALPHAFOLD] +-ah (--alpha_home) AlphaFold2 installation directory [Default: \$ALPHAFOLD_HOME] +-ad (--alpha_db) AlphaFold2 databases location [Default: \$TDFI_DB/ALPHAFOLD] ``` diff --git a/run_3DFI.pl b/run_3DFI.pl index 5310f0f..4308d32 100755 --- a/run_3DFI.pl +++ b/run_3DFI.pl @@ -1,8 +1,8 @@ #!/usr/bin/perl ## Pombert Lab, Illinois Tech, 2021 my $name = 'run_3DFI.pl'; -my $version = '0.4b'; -my $updated = '2021-12-21'; +my $version = '0.5'; +my $updated = '2021-12-23'; use strict; use warnings; @@ -46,7 +46,9 @@ -n (--nogpu) ALPHAFOLD/ROSETTAFOLD: Turn off GPU acceleration / use CPU only -g (--gpu_dev) ALPHAFOLD: list of GPU devices to use: e.g. all; 0,1; 0,1,2,3 [Default: all] -m (--maxdate) ALPHAFOLD: --max_template_date option (YYYY-MM-DD) [Default: current date] ---preset ALPHAFOLD: full_dbs, reduced_dbs or casp14 [Default: full_dbs] +-s (--preset) ALPHAFOLD: full_dbs or reduced_dbs [Default: full_dbs] +-i (--docker_image) ALPHAFOLD: docker image name [Default: alphafold_3dfi] +-u (--use_msas) ALPHAFOLD: Use precomputed MSAs -k (--ranks) RAPTORX: # Number of top ranks to model [Default: 5] --modeller RAPTORX: Modeller version [Default: mod10.1] @@ -77,6 +79,8 @@ my $gpus = 'all'; my $maxdate; my $preset = 'full_dbs'; +my $docker_image = 'alphafold_3dfi'; +my $precomputed_msas; my $ranks = 5; my $modeller = 'mod10.1'; @@ -107,8 +111,11 @@ # 3D folding 'n|nogpu' => \$nogpu, 'g|gpu_dev=s' => $gpus, - 'k|ranks=i' => \$ranks, 'm|maxdate=s' => \$maxdate, + 's|preset=s' => \$preset, + 'i|docker_image=s' => \$docker_image, + 'u|use_msas' => \$precomputed_msas, + 'k|ranks=i' => \$ranks, 'modeller=s' => \$modeller, # Structural homology @@ -364,19 +371,28 @@ $pred_scripts_home = "$home_3DFI".'/Prediction/AlphaFold2/'; ## Checking options + # GPU my $gpu_devices = "--gpu_dev $gpus"; if ($nogpu) { $gpu_devices = '--no_gpu'; } + + # Maxdate my $maxdate_flag = ''; if ($maxdate) { $maxdate_flag = "--max_date $maxdate"; } + # Use precomputed MSAs + my $msas_flag = ''; + if ($precomputed_msas){ $msas_flag = "--use_msas"; } + ## Running alphafold $time = localtime; print "\n# $time: Running AlphaFold protein structure prediction\n"; system "$pred_scripts_home"."alphafold.pl \\ --fasta $fasta_dir/*.fasta \\ --preset $preset \\ + --docker $docker_image \\ $gpu_devices \\ $maxdate_flag \\ + $msas_flag \\ -o $af_dir"; ## Parsing AlphaFold output folders diff --git a/setup_3DFI.pl b/setup_3DFI.pl index 6159a3d..2e958a5 100755 --- a/setup_3DFI.pl +++ b/setup_3DFI.pl @@ -35,7 +35,11 @@ -i (--install) 3D structure predictor(s) to install (alphafold raptorx and/or rosettafold) -pyr (--pyrosetta) PyRosetta4 [Python-3.7.Release] .tar.bz2 archive to install # Download - https://www.pyrosetta.org/downloads#h.xe4c0yjfkl19 - # License - https://els2.comotion.uw.edu/product/pyrosetta + # License - https://els2.comotion.uw.edu/product/pyrosetta + +## Docker +-name (--docker_image) Name of the AlphaFold docker image to build [Default: alphafold_3dfi] + OPTIONS die "\n$usage\n" unless @ARGV; @@ -45,13 +49,15 @@ my $database; my @predictors; my $pyrosetta; +my $docker_image = 'alphafold_3dfi'; GetOptions( 'c|config=s' => \$config_file, 'w|write=s' => \$write, 'p|path=s' => \$path_3DFI, 'd|dbdir=s' => \$database, 'i|install=s@{1,}' => \@predictors, - 'pyr|pyrosetta=s' => \$pyrosetta + 'pyr|pyrosetta=s' => \$pyrosetta, + 'name|docker_image=s' => \$docker_image ); ###################################################### @@ -212,9 +218,9 @@ else { system "git clone $alphafold_git"; } # Creating Docker image + pip install of reqs - print "\nCreating AlphaFold docker image named alphafold_3dfi\n"; + print "\nCreating AlphaFold docker image named $docker_image\n"; chdir "$root_3D/alphafold/"; - system "docker build -f $root_3D/alphafold/docker/Dockerfile -t alphafold_3dfi ."; + system "docker build -f $root_3D/alphafold/docker/Dockerfile -t $docker_image ."; # Creating a pip location for AlphaFold requirements unless (-d $pip_location){