From 4efe3f31b145de71d8dc46a3f340e56dd078f664 Mon Sep 17 00:00:00 2001 From: darcy220606 Date: Thu, 27 Oct 2022 14:53:42 +0200 Subject: [PATCH 01/15] Add interactive html R --- shinyapp/HTML.R | 64 +++++++++++++++++++ shinyapp/{shinyapp_html_file.R => shinyapp.R} | 9 ++- 2 files changed, 68 insertions(+), 5 deletions(-) create mode 100644 shinyapp/HTML.R rename shinyapp/{shinyapp_html_file.R => shinyapp.R} (95%) diff --git a/shinyapp/HTML.R b/shinyapp/HTML.R new file mode 100644 index 0000000..ee68145 --- /dev/null +++ b/shinyapp/HTML.R @@ -0,0 +1,64 @@ +#!/usr/bin/env Rscript + +############################## +# Rscript to visualise the complete summary tables generated by AMPcombi #### +############################## +# Date #### +# October, 19 2022 +############################## +# Authors #### +# Anan Ibrahim +# ananhamido@hotmail.com +# @darcy220606 +############################## +# Working_directory #### +############################## +setwd("/home/aibrahim/github/testing_ampcombi_on_deepevo") +############################## +# Libraries used + arguments #### +if (!require("dplyr")) install.packages('dplyr') +if (!require("DT")) install.packages('DT') +if (!require("optparse")) install.packages('optparse') + +library("dplyr") +library("DT") +library("optparse") +library(htmlwidgets) +# option_list = list( +# make_option(c("-f", "--file"), type="character", default=NULL, +# help="AMpcombi complete summary table", metavar="character"), +# make_option(c("-o", "--out"), type="character", default="AMPcombi_summary.html", +# help="Provide the name of the output file [default= %default]", metavar="character") +# ); +# +# opt_parser = OptionParser(option_list=option_list); +# opt = parse_args(opt_parser); + +############################## +#Generate HTML interactive files #### +table <- + readr::read_csv("ECO004-CDC007.csv") %>% + unique() + +result<-datatable(table, + class = 'cell-border stripe', ## add column border + options = list( paging = TRUE, ## paginate the output + pageLength = 100, ## number of rows to output for each page + scrollX = TRUE, ## enable scrolling on X axis + scrollY = TRUE, ## enable scrolling on Y axis + autoWidth = TRUE, ## use smart column width handling + #width = 20, + server = TRUE, ## use client-side processing only load the 100 on display + dom = 'Bfrtip', + #bordered = TRUE, + buttons = c('csv', 'excel'), ## the user can just download what on display because server=TRUE + columnDefs = list(list(targets = '_all', className = 'dt-center',width = '20px'), + list(targets = c(0, 8, 9), visible = TRUE))), + extensions = 'Buttons', + selection = 'multiple', ## enable selection of a single row + filter = 'top', ## include column filters at the bottom + rownames = FALSE ## don't show row numbers/names + ) + +htmlwidgets::saveWidget(result, "result.html") +############################## diff --git a/shinyapp/shinyapp_html_file.R b/shinyapp/shinyapp.R similarity index 95% rename from shinyapp/shinyapp_html_file.R rename to shinyapp/shinyapp.R index 75784cf..3c42865 100644 --- a/shinyapp/shinyapp_html_file.R +++ b/shinyapp/shinyapp.R @@ -1,7 +1,9 @@ #!/usr/bin/env Rscript +#Rscript shinyapp/shinyapp_html_file.R -f ../testing_ampcombi_on_deepevo/ECO004-CDC007.csv + ############################## -# Rscript to visualise the complete summary tables generated by AMPcombi #### +# Shinyapp to visualise the complete summary tables generated by AMPcombi #### ############################## # Date #### # October, 19 2022 @@ -21,8 +23,7 @@ library(DT) library(shiny) library(data.table) library(ggplot2) -library("optparse") -library(htmlwidgets) +library(optparse) option_list = list( make_option(c("-f", "--file"), type="character", default=NULL, @@ -37,8 +38,6 @@ opt = parse_args(opt_parser); ############################## # Generate the html file #### ############################## -#args[2] = "AMPcombi_summary.html" - table <- readr::read_csv(opt$file) %>% unique() From 4d2c55544513e84c51a1bb08d11af777476b87bb Mon Sep 17 00:00:00 2001 From: darcy220606 Date: Thu, 27 Oct 2022 14:57:21 +0200 Subject: [PATCH 02/15] Add HTML rscript --- shinyapp/HTML.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/shinyapp/HTML.R b/shinyapp/HTML.R index ee68145..9d58bf5 100644 --- a/shinyapp/HTML.R +++ b/shinyapp/HTML.R @@ -46,14 +46,14 @@ result<-datatable(table, pageLength = 100, ## number of rows to output for each page scrollX = TRUE, ## enable scrolling on X axis scrollY = TRUE, ## enable scrolling on Y axis - autoWidth = TRUE, ## use smart column width handling + #autoWidth = TRUE, ## use smart column width handling #width = 20, server = TRUE, ## use client-side processing only load the 100 on display dom = 'Bfrtip', #bordered = TRUE, buttons = c('csv', 'excel'), ## the user can just download what on display because server=TRUE - columnDefs = list(list(targets = '_all', className = 'dt-center',width = '20px'), - list(targets = c(0, 8, 9), visible = TRUE))), + columnDefs = list(list(targets = '_all', className = 'dt-center',width = '20px') + )), extensions = 'Buttons', selection = 'multiple', ## enable selection of a single row filter = 'top', ## include column filters at the bottom From 83808ae97a149dd689302d34d3bf997127af8daa Mon Sep 17 00:00:00 2001 From: darcy220606 Date: Thu, 27 Oct 2022 16:11:42 +0200 Subject: [PATCH 03/15] Add HTML.R --- shinyapp/HTML.R | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/shinyapp/HTML.R b/shinyapp/HTML.R index 9d58bf5..cdc022a 100644 --- a/shinyapp/HTML.R +++ b/shinyapp/HTML.R @@ -24,20 +24,21 @@ library("dplyr") library("DT") library("optparse") library(htmlwidgets) -# option_list = list( -# make_option(c("-f", "--file"), type="character", default=NULL, -# help="AMpcombi complete summary table", metavar="character"), -# make_option(c("-o", "--out"), type="character", default="AMPcombi_summary.html", -# help="Provide the name of the output file [default= %default]", metavar="character") -# ); -# -# opt_parser = OptionParser(option_list=option_list); -# opt = parse_args(opt_parser); + +option_list = list( + make_option(c("-f", "--file"), type="character", default="AMPcombi_summary.csv", + help="AMpcombi complete summary table [default= %default]", metavar="character"), + make_option(c("-o", "--out"), type="character", default="AMPcombi_summary.html", + help="Provide the name of the output file [default= %default]", metavar="character") +); + +opt_parser = OptionParser(option_list=option_list); +opt = parse_args(opt_parser); ############################## #Generate HTML interactive files #### table <- - readr::read_csv("ECO004-CDC007.csv") %>% + readr::read_csv(opt$file) %>% unique() result<-datatable(table, @@ -46,19 +47,23 @@ result<-datatable(table, pageLength = 100, ## number of rows to output for each page scrollX = TRUE, ## enable scrolling on X axis scrollY = TRUE, ## enable scrolling on Y axis - #autoWidth = TRUE, ## use smart column width handling - #width = 20, + autoWidth = TRUE, ## use smart column width handling + #width = 100, + #height=100, server = TRUE, ## use client-side processing only load the 100 on display dom = 'Bfrtip', #bordered = TRUE, buttons = c('csv', 'excel'), ## the user can just download what on display because server=TRUE - columnDefs = list(list(targets = '_all', className = 'dt-center',width = '20px') - )), + columnDefs = list(list(targets = '_all', className = 'dt-center'), + list(targets = c(0, 8, 9), visible = TRUE))), extensions = 'Buttons', selection = 'multiple', ## enable selection of a single row filter = 'top', ## include column filters at the bottom rownames = FALSE ## don't show row numbers/names ) -htmlwidgets::saveWidget(result, "result.html") +# Change the HTML size to fill the browser +result$sizingPolicy$browser$fill<-TRUE +# SAves the html file +htmlwidgets::saveWidget(result, opt$out, selfcontained = FALSE) ############################## From 3692593dbef7b42bce984be0b303b066ca0ed5fe Mon Sep 17 00:00:00 2001 From: darcy220606 Date: Thu, 27 Oct 2022 17:06:16 +0200 Subject: [PATCH 04/15] Adds final HTML.R --- shinyapp/HTML.R | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/shinyapp/HTML.R b/shinyapp/HTML.R index cdc022a..23ac258 100644 --- a/shinyapp/HTML.R +++ b/shinyapp/HTML.R @@ -7,9 +7,8 @@ # October, 19 2022 ############################## # Authors #### -# Anan Ibrahim -# ananhamido@hotmail.com -# @darcy220606 +# Anan Ibrahim - ananhamido@hotmail.com - @darcy220606 +# Louisa Perelo -louperelo@gmail.com - @louperelo ############################## # Working_directory #### ############################## @@ -23,7 +22,7 @@ if (!require("optparse")) install.packages('optparse') library("dplyr") library("DT") library("optparse") -library(htmlwidgets) +library("htmlwidgets") option_list = list( make_option(c("-f", "--file"), type="character", default="AMPcombi_summary.csv", @@ -50,7 +49,7 @@ result<-datatable(table, autoWidth = TRUE, ## use smart column width handling #width = 100, #height=100, - server = TRUE, ## use client-side processing only load the 100 on display + server = FALSE, ## use client-side processing only load the 100 on display dom = 'Bfrtip', #bordered = TRUE, buttons = c('csv', 'excel'), ## the user can just download what on display because server=TRUE @@ -63,7 +62,7 @@ result<-datatable(table, ) # Change the HTML size to fill the browser -result$sizingPolicy$browser$fill<-TRUE -# SAves the html file +result$sizingPolicy$defaultWidth<-"100%" + htmlwidgets::saveWidget(result, opt$out, selfcontained = FALSE) ############################## From bce5a340c8ad29fa8389778aaeaaf990c33cc4a2 Mon Sep 17 00:00:00 2001 From: darcy220606 Date: Fri, 28 Oct 2022 00:45:51 +0200 Subject: [PATCH 05/15] Add the HTML file to ampcombi --- CHANGES.txt | 4 +- README.md | 3 +- {shinyapp => ampcombi}/HTML.R | 21 +++-- ampcombi/ampcombi.py | 6 +- ampcombi/print_header.py | 0 ampcombi/version.py | 2 +- ampcombi/visualise_complete_summary.py | 11 +++ setup.py | 6 +- shinyapp/shinyapp.R | 104 ------------------------- 9 files changed, 38 insertions(+), 119 deletions(-) rename {shinyapp => ampcombi}/HTML.R (81%) mode change 100644 => 100755 mode change 100644 => 100755 ampcombi/ampcombi.py mode change 100644 => 100755 ampcombi/print_header.py create mode 100755 ampcombi/visualise_complete_summary.py delete mode 100644 shinyapp/shinyapp.R diff --git a/CHANGES.txt b/CHANGES.txt index e996c17..31c7292 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -9,4 +9,6 @@ v<0.1.4>, <18.10.2022> v<0.1.5>, <27.10.2022> -- Initial release. -- adapt reading of hmmer_hmmsearch output to deal with varying header lines -- fix syntax in "if" statements in "check_input.py" - -- include "check_faa_path" function, to find .faa files also in subdirectories \ No newline at end of file + -- include "check_faa_path" function, to find .faa files also in subdirectories +v<0.1.4>, + -- Included the HTML output for the complete summary \ No newline at end of file diff --git a/README.md b/README.md index 2b87414..e7f7aed 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,7 @@ The path to the folder containing the respective protein fasta files has to be p | --faa_folder | path to the folder containing the samples` .faa files, Filenames have to contain the corresponding sample-name, i.e. sample_1.faa | ./test_faa/ | ./faa_files/| | --tooldict | dictionary of AMP-tools and their respective output file endings | '{"ampir":"ampir.tsv", "amplify":"amplify.tsv", "macrel":"macrel.tsv", "hmmer_hmmsearch":"hmmsearch.txt", "ensembleamppred":"ensembleamppred.txt"}' | - | | --amp_database | path to the folder containing the reference database files: (1) a fasta file with <.fasta> file extension and (2) the corresponding table with with functional and taxonomic classifications in <.tsv> file extension | [DRAMP 'general amps'](http://dramp.cpu-bioinfor.org/downloads/) database | ./amp_ref_database/ | -| --complete_summary | Concatenates all samples' summarized tables into one | False | True | +| --complete_summary | Concatenates all samples' summarized tables into one and generates both 'csv' and 'html' files | False | True | | --log | print messages into log file instead of stdout | False | True | | --version | print the version number into stdout | - | 0.1.4 | @@ -136,6 +136,7 @@ The output will be written into your working directory, containing the following | ├── sample_2_ampcombi.csv | └── sample_2_diamond_matches.txt ├── AMPcombi_summary.csv +├── AMPcombi_summary.html └── ampcombi.log ``` diff --git a/shinyapp/HTML.R b/ampcombi/HTML.R old mode 100644 new mode 100755 similarity index 81% rename from shinyapp/HTML.R rename to ampcombi/HTML.R index 23ac258..e7ffb2d --- a/shinyapp/HTML.R +++ b/ampcombi/HTML.R @@ -8,16 +8,17 @@ ############################## # Authors #### # Anan Ibrahim - ananhamido@hotmail.com - @darcy220606 -# Louisa Perelo -louperelo@gmail.com - @louperelo +# Louisa Perelo - louperelo@gmail.com - @louperelo ############################## # Working_directory #### -############################## -setwd("/home/aibrahim/github/testing_ampcombi_on_deepevo") +# setwd("/home/aibrahim/github/testing_ampcombi_on_deepevo") +setwd(getwd()) ############################## # Libraries used + arguments #### if (!require("dplyr")) install.packages('dplyr') if (!require("DT")) install.packages('DT') if (!require("optparse")) install.packages('optparse') +if (!require("htmlwidgets")) install.packages('htmlwidgets') library("dplyr") library("DT") @@ -28,16 +29,16 @@ option_list = list( make_option(c("-f", "--file"), type="character", default="AMPcombi_summary.csv", help="AMpcombi complete summary table [default= %default]", metavar="character"), make_option(c("-o", "--out"), type="character", default="AMPcombi_summary.html", - help="Provide the name of the output file [default= %default]", metavar="character") -); - + help="Provide the name of the output file [default= %default]", metavar="character")); +# Turns warnings off +#options(warn=-1) opt_parser = OptionParser(option_list=option_list); opt = parse_args(opt_parser); ############################## #Generate HTML interactive files #### table <- - readr::read_csv(opt$file) %>% + readr::read_csv(opt$file,show_col_types = FALSE) %>% unique() result<-datatable(table, @@ -51,10 +52,11 @@ result<-datatable(table, #height=100, server = FALSE, ## use client-side processing only load the 100 on display dom = 'Bfrtip', + language = list(sSearch = "Keyword look-up:"), #bordered = TRUE, buttons = c('csv', 'excel'), ## the user can just download what on display because server=TRUE columnDefs = list(list(targets = '_all', className = 'dt-center'), - list(targets = c(0, 8, 9), visible = TRUE))), + list(targets='aa_sequence', visible=TRUE, width='20'))), extensions = 'Buttons', selection = 'multiple', ## enable selection of a single row filter = 'top', ## include column filters at the bottom @@ -65,4 +67,7 @@ result<-datatable(table, result$sizingPolicy$defaultWidth<-"100%" htmlwidgets::saveWidget(result, opt$out, selfcontained = FALSE) + +# CLean up the library folder created +unlink("AMPcombi_summary_files", recursive = TRUE) ############################## diff --git a/ampcombi/ampcombi.py b/ampcombi/ampcombi.py old mode 100644 new mode 100755 index 995e706..26d3d87 --- a/ampcombi/ampcombi.py +++ b/ampcombi/ampcombi.py @@ -13,6 +13,7 @@ from check_input import * from amp_database import * from print_header import * +from visualise_complete_summary import * # Define input arguments: parser = argparse.ArgumentParser(prog = 'ampcombi', formatter_class=argparse.RawDescriptionHelpFormatter, @@ -41,7 +42,7 @@ type=str, default='{"ampir":"ampir.tsv", "amplify":"amplify.tsv", "macrel":"macrel.tsv", "neubi":"neubi.fasta", "hmmer_hmmsearch":"hmmsearch.txt", "ensembleamppred":"ensembleamppred.txt"}') parser.add_argument("--amp_database", dest="ref_db", nargs='?', help="Enter the path to the folder containing the reference database files (.fa and .tsv); a fasta file and the corresponding table with functional and taxonomic classifications. \n (default: DRAMP database)", type=str, default=None) -parser.add_argument("--complete_summary", dest="complete", nargs='?', help="Concatenates all sample summaries to one final summary", +parser.add_argument("--complete_summary", dest="complete", nargs='?', help="Concatenates all sample summaries to one final summary and outputs both csv and interactive html files", type=bool, default=False) parser.add_argument("--log", dest="log_file", nargs='?', help="Silences the standard output and captures it in a log file)", type=bool, default=False) @@ -121,10 +122,11 @@ def main_workflow(): # concatenate the sample summary to the complete summary and overwrite it complete_summary_df = pd.concat([complete_summary_df, sample_summary_df]) complete_summary_df.to_csv('AMPcombi_summary.csv', sep=',', index=False) + html_generator() else: continue if (complete_summary): - print(f'\n FINISHED: The AMPcombi_summary.csv file was saved to your current working directory.') + print(f'\n FINISHED: The AMPcombi_summary.csv and AMPcombi_summary.html file was saved to your current working directory.') else: print(f'\n FINISHED: AMPcombi created summaries for all input samples.') diff --git a/ampcombi/print_header.py b/ampcombi/print_header.py old mode 100644 new mode 100755 diff --git a/ampcombi/version.py b/ampcombi/version.py index e2888cc..63eb0cb 100644 --- a/ampcombi/version.py +++ b/ampcombi/version.py @@ -1 +1 @@ -__version__ = '0.1.5' \ No newline at end of file +__version__ = '0.1.6' \ No newline at end of file diff --git a/ampcombi/visualise_complete_summary.py b/ampcombi/visualise_complete_summary.py new file mode 100755 index 0000000..88625f8 --- /dev/null +++ b/ampcombi/visualise_complete_summary.py @@ -0,0 +1,11 @@ +#!/bin/python3 + +# TITLE: Visualise teh complete summary and save it to a HTML file + +import subprocess + +######################################## +# FUNCTION: DOWNLOAD DRAMP DATABASE AND CLEAN IT +######################################### +def html_generator(): + subprocess.run('HTML.R', text=True) \ No newline at end of file diff --git a/setup.py b/setup.py index 7da8db5..e1b51a7 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='AMPcombi', - version='0.1.5', + version='0.1.6', author='Anan Ibrahim, Louisa Perelo', author_email='ananhamido@hotmail.com, louperelo@gmail.com', packages=['ampcombi'], @@ -17,7 +17,9 @@ 'ampcombi/diamond_makedb.sh', 'ampcombi/reformat_tables.py', 'ampcombi/print_header.py', - 'ampcombi/version.py'], + 'ampcombi/version.py', + 'ampcombi/visualise_complete_summary.py', + 'ampcombi/HTML.R'], url='http://pypi.python.org/pypi/AMPcombi/', license='LICENSE.txt', description='A parsing tool for AMP tools.', diff --git a/shinyapp/shinyapp.R b/shinyapp/shinyapp.R deleted file mode 100644 index 3c42865..0000000 --- a/shinyapp/shinyapp.R +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/bin/env Rscript - -#Rscript shinyapp/shinyapp_html_file.R -f ../testing_ampcombi_on_deepevo/ECO004-CDC007.csv - -############################## -# Shinyapp to visualise the complete summary tables generated by AMPcombi #### -############################## -# Date #### -# October, 19 2022 -############################## -# Authors #### -# Anan Ibrahim -# ananhamido@hotmail.com -# @darcy220606 -############################## -# Working_directory #### -############################## -#setwd("/home/aibrahim/github/testing_ampcombi_on_deepevo") -############################## -# Libraries used + arguments #### -library(dplyr) -library(DT) -library(shiny) -library(data.table) -library(ggplot2) -library(optparse) - -option_list = list( - make_option(c("-f", "--file"), type="character", default=NULL, - help="AMpcombi complete summary table", metavar="character"), - make_option(c("-o", "--out"), type="character", default="AMPcombi_summary.html", - help="Provide the name of the output file [default= %default]", metavar="character") -); - -opt_parser = OptionParser(option_list=option_list); -opt = parse_args(opt_parser); - -############################## -# Generate the html file #### -############################## -table <- - readr::read_csv(opt$file) %>% - unique() - -about_page <- tabPanel( - title = strong('About'), - br(), - includeMarkdown("https://raw.githubusercontent.com/Darcy220606/AMPcombi/dev/README.md") -) -## page 2: Summary table -summary_page <- tabPanel(title = strong('Summary table'), - mainPanel(DTOutput('tbl'), - width = 20)) -## page 3: Plots and figures -plots_page <- tabPanel(title = strong('Plots'), - titlePanel("Analysis"), - sidebarLayout( - sidebarPanel( - ), - mainPanel( - tabsetPanel( - tabPanel( - title = "Plot" - ), - tabPanel( - title = "Statistics", - ) - ) - ) - )) - - -## Shiny app -ui <- navbarPage( - title = strong("AMPcombi"), - about_page, - summary_page, - plots_page, - tags$style(type = 'text/css', '.navbar { background-color: #a2d2ff; - font-family: Arial; - font-size: 15px; - color: #023047; }') -) -server <- function(input, output) -{output$tbl = renderDT(table, - class = 'cell-border stripe', ## add column border - options = list( paging = TRUE, ## paginate the output - pageLength = 100, ## number of rows to output for each page - scrollX = TRUE, ## enable scrolling on X axis - scrollY = TRUE, ## enable scrolling on Y axis - autoWidth = TRUE, ## use smart column width handling - #width = 200, - server = TRUE, ## use client-side processing only load the 100 on display - dom = 'Bfrtip', - #bordered = TRUE, - buttons = c('csv', 'excel'), ## the user can just download what on display because server=TRUE - columnDefs = list(list(targets = '_all', className = 'dt-center'), - list(targets = c(0, 8, 9), visible = TRUE))), - extensions = 'Buttons', - selection = 'multiple', ## enable selection of a single row - filter = 'top', ## include column filters at the bottom - rownames = FALSE ## don't show row numbers/names -)} -shinyApp(ui = ui, server = server) From b3ff30add8d5532d5d0ea6543ddfb7f4809f09c7 Mon Sep 17 00:00:00 2001 From: Anan Ibrahim <81744003+Darcy220606@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:28:56 +0200 Subject: [PATCH 06/15] Update README.md Co-authored-by: louperelo <44900284+louperelo@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e7f7aed..c862d0f 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,7 @@ The path to the folder containing the respective protein fasta files has to be p | --faa_folder | path to the folder containing the samples` .faa files, Filenames have to contain the corresponding sample-name, i.e. sample_1.faa | ./test_faa/ | ./faa_files/| | --tooldict | dictionary of AMP-tools and their respective output file endings | '{"ampir":"ampir.tsv", "amplify":"amplify.tsv", "macrel":"macrel.tsv", "hmmer_hmmsearch":"hmmsearch.txt", "ensembleamppred":"ensembleamppred.txt"}' | - | | --amp_database | path to the folder containing the reference database files: (1) a fasta file with <.fasta> file extension and (2) the corresponding table with with functional and taxonomic classifications in <.tsv> file extension | [DRAMP 'general amps'](http://dramp.cpu-bioinfor.org/downloads/) database | ./amp_ref_database/ | -| --complete_summary | Concatenates all samples' summarized tables into one and generates both 'csv' and 'html' files | False | True | +| --complete_summary | Concatenates all samples' summarized tables into one and generates both 'csv' and interactive 'html' files | False | True | | --log | print messages into log file instead of stdout | False | True | | --version | print the version number into stdout | - | 0.1.4 | From 39f2db81bb8ba511e950bf06161a7a7026c73704 Mon Sep 17 00:00:00 2001 From: Anan Ibrahim <81744003+Darcy220606@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:30:10 +0200 Subject: [PATCH 07/15] Update CHANGES.txt Co-authored-by: louperelo <44900284+louperelo@users.noreply.github.com> --- CHANGES.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 31c7292..2bd9054 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -10,5 +10,5 @@ v<0.1.5>, <27.10.2022> -- Initial release. -- adapt reading of hmmer_hmmsearch output to deal with varying header lines -- fix syntax in "if" statements in "check_input.py" -- include "check_faa_path" function, to find .faa files also in subdirectories -v<0.1.4>, +v<0.1.6>, <28.10.2022> -- Included the HTML output for the complete summary \ No newline at end of file From 0952ad6fcdf70b7b0562df81a900834c2a337e1a Mon Sep 17 00:00:00 2001 From: Anan Ibrahim <81744003+Darcy220606@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:32:07 +0200 Subject: [PATCH 08/15] Update ampcombi/ampcombi.py Co-authored-by: louperelo <44900284+louperelo@users.noreply.github.com> --- ampcombi/ampcombi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ampcombi/ampcombi.py b/ampcombi/ampcombi.py index 26d3d87..adc7903 100755 --- a/ampcombi/ampcombi.py +++ b/ampcombi/ampcombi.py @@ -126,7 +126,7 @@ def main_workflow(): else: continue if (complete_summary): - print(f'\n FINISHED: The AMPcombi_summary.csv and AMPcombi_summary.html file was saved to your current working directory.') + print(f'\n FINISHED: The AMPcombi_summary.csv and AMPcombi_summary.html files were saved to your current working directory.') else: print(f'\n FINISHED: AMPcombi created summaries for all input samples.') From bb85ca494172fc326c9f3ca4579fe8e6ed82d437 Mon Sep 17 00:00:00 2001 From: Anan Ibrahim <81744003+Darcy220606@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:33:38 +0200 Subject: [PATCH 09/15] Update ampcombi/visualise_complete_summary.py Co-authored-by: louperelo <44900284+louperelo@users.noreply.github.com> --- ampcombi/visualise_complete_summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ampcombi/visualise_complete_summary.py b/ampcombi/visualise_complete_summary.py index 88625f8..7b4fe8e 100755 --- a/ampcombi/visualise_complete_summary.py +++ b/ampcombi/visualise_complete_summary.py @@ -5,7 +5,7 @@ import subprocess ######################################## -# FUNCTION: DOWNLOAD DRAMP DATABASE AND CLEAN IT +# FUNCTION: GENERATE AN INTERACTIVE HTML SUMMARY ######################################### def html_generator(): subprocess.run('HTML.R', text=True) \ No newline at end of file From 83c1b6da2d49ac84bf540bc379c2cf5d123e7463 Mon Sep 17 00:00:00 2001 From: Anan Ibrahim <81744003+Darcy220606@users.noreply.github.com> Date: Fri, 28 Oct 2022 10:34:06 +0200 Subject: [PATCH 10/15] Update HTML.R remove the commented out setwd --- ampcombi/HTML.R | 1 - 1 file changed, 1 deletion(-) diff --git a/ampcombi/HTML.R b/ampcombi/HTML.R index e7ffb2d..81627cd 100755 --- a/ampcombi/HTML.R +++ b/ampcombi/HTML.R @@ -11,7 +11,6 @@ # Louisa Perelo - louperelo@gmail.com - @louperelo ############################## # Working_directory #### -# setwd("/home/aibrahim/github/testing_ampcombi_on_deepevo") setwd(getwd()) ############################## # Libraries used + arguments #### From 925447201e7553ceafe197220057233aa701595a Mon Sep 17 00:00:00 2001 From: darcy220606 Date: Tue, 1 Nov 2022 15:42:17 +0100 Subject: [PATCH 11/15] Check if db was already downloaded in cwd --- ampcombi/check_input.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/ampcombi/check_input.py b/ampcombi/check_input.py index 4ffdd61..adef0f8 100755 --- a/ampcombi/check_input.py +++ b/ampcombi/check_input.py @@ -39,20 +39,25 @@ def check_faa_path(faa_path, samplename): return path_list[0] def check_ref_database(database): - if(database==None): + if((database==None) and (not os.path.exists('amp_ref_database'))): print('<--AMP_database> was not given, the current DRAMP general-AMP database will be downloaded and used') database = 'amp_ref_database' os.makedirs(database, exist_ok=True) db = database download_DRAMP(db) - return db - else: - if os.path.exists(database): + return db + elif ((not database==None)): + if (os.path.exists(database)): db = database + print(f'<--AMP_database> = ${db} is found and will be used') return db - else: - if not os.path.exists(database): - sys.exit(f'Reference amp database path {database} does not exist, please check the path.') + if (not os.path.exists(database)): + sys.exit(f'Reference amp database path {database} does not exist, please check the path.') + elif((database==None) and (os.path.exists('amp_ref_database'))): + print('<--AMP_database> = DRAMP is already downloaded and will be reused') + database = 'amp_ref_database' + db = database + return db def check_path(path): return os.path.exists(path) #returns True or False From a75bc00c32ecf873a133b18cf01f172ad9cf0d2d Mon Sep 17 00:00:00 2001 From: darcy220606 Date: Tue, 1 Nov 2022 16:07:42 +0100 Subject: [PATCH 12/15] Add the diamond threads as argument --- ampcombi/amp_database.py | 8 ++++---- ampcombi/ampcombi.py | 9 ++++++--- ampcombi/diamond_alignment.sh | 7 +++++-- ampcombi/diamond_makedb.sh | 5 ++++- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/ampcombi/amp_database.py b/ampcombi/amp_database.py index ae3bbb9..7e54a42 100755 --- a/ampcombi/amp_database.py +++ b/ampcombi/amp_database.py @@ -44,14 +44,14 @@ def download_DRAMP(db): ######################################## # FUNCTION: CREATE DIAMOND COMPATIBLE DATBASE FORMATS ######################################### -def create_diamond_ref_db(db): +def create_diamond_ref_db(db,threads): cwd = os.getcwd() for file in os.listdir(db): if file.endswith('.fasta'): path = os.path.join(os.path.abspath(db) + '/' + file) os.chdir(db) #process = subprocess.Popen([f'{scripts_path}/diamond_makedb.sh', path]) - subprocess.run('diamond_makedb.sh', text=True, input=path) + subprocess.run('diamond_makedb.sh', text=True, input=f'{path}\n{threads}') os.chdir(cwd) print return path @@ -59,13 +59,13 @@ def create_diamond_ref_db(db): ######################################## # FUNCTION: DIAMOND ALIGNMENT ######################################### -def diamond_alignment(db, amp_faa_paths, amp_matches): +def diamond_alignment(db, amp_faa_paths, amp_matches,threads): #create temp folder and delete at the end cwd = os.getcwd() for path in amp_faa_paths: # align the query with the database temp = tempfile.mkdtemp() - subprocess.run('diamond_alignment.sh', text=True, input=f'{path}\n{temp}\n{db}') + subprocess.run('diamond_alignment.sh', text=True, input=f'{path}\n{temp}\n{db}\n{threads}') shutil.move(temp+'/diamond_matches.tsv', amp_matches) shutil.rmtree(temp) # mege the diamond_alignment with the ref_db table diff --git a/ampcombi/ampcombi.py b/ampcombi/ampcombi.py index adc7903..cd4a545 100755 --- a/ampcombi/ampcombi.py +++ b/ampcombi/ampcombi.py @@ -46,6 +46,8 @@ type=bool, default=False) parser.add_argument("--log", dest="log_file", nargs='?', help="Silences the standard output and captures it in a log file)", type=bool, default=False) +parser.add_argument("--threads", dest="cores", nargs='?', help="Changes the threads used for DIAMOND alignment (default: %(default)s)", + type=bool, default='4') parser.add_argument('--version', action='version', version='%(prog)s ' + __version__) # get command line arguments @@ -60,6 +62,7 @@ tooldict = json.loads(args.tools) database = args.ref_db complete_summary = args.complete +threads = args.cores # additional variables # extract list of tools from input dictionary. If not given, default dict contains all possible tools @@ -90,7 +93,7 @@ def main_workflow(): # generate summary for each sample amp_faa_paths = [] - create_diamond_ref_db(db) + create_diamond_ref_db(db,threads) for i in range(0, len(samplelist)): main_list = [] print('\n ########################################################## ') @@ -108,8 +111,8 @@ def main_workflow(): amp_faa_paths.append(out_path) print(f'The fasta containing AMP sequences for {samplelist[i]} was saved to {samplelist[i]}/ \n') amp_matches = samplelist[i] +'/'+samplelist[i]+'_diamond_matches.txt' - print(f'The diamond alignment for {samplelist[i]} in process....') - diamond_df = diamond_alignment(db, amp_faa_paths, amp_matches) + print(f'The diamond alignment for {samplelist[i]} in progress ....') + diamond_df = diamond_alignment(db, amp_faa_paths, amp_matches, threads) print(f'The diamond alignment for {samplelist[i]} was saved to {samplelist[i]}/.') # Merge summary_df and diamond_df sample_summary_df = pd.merge(summary_df, diamond_df, on = 'contig_id', how='left') diff --git a/ampcombi/diamond_alignment.sh b/ampcombi/diamond_alignment.sh index 2a6ab9a..e0bd5b4 100755 --- a/ampcombi/diamond_alignment.sh +++ b/ampcombi/diamond_alignment.sh @@ -7,18 +7,21 @@ INPUT_FASTA=$1 OUTPUT_DIR=$2 REF_DIR=$3 +THREADS=$4 read INPUT_FASTA read OUTPUT_DIR read REF_DIR +read THREADS -# Adjust path according to the input folder with the ist of fasta files +# Adjust path according to the input folder with the list of fasta files IN=$INPUT_FASTA OUT=$OUTPUT_DIR REF_DB=$REF_DIR +P=$THREADS diamond blastp \ --p 28 -d $REF_DB/amp_ref -q $IN --quiet \ +-p $P -d $REF_DB/amp_ref -q $IN --quiet \ --outfmt 6 qseqid sseqid pident evalue nident full_qseq full_sseq qseq sseq qcovhsp scovhsp --max-target-seqs 1 --ultra-sensitive -e10000 --id2 1 -s1 -c1 --masking 0 --gapped-filter-evalue 0 --algo 0 --min-score 0 --shape-mask 1111 \ -o $OUT/diamond_matches.txt diff --git a/ampcombi/diamond_makedb.sh b/ampcombi/diamond_makedb.sh index ed76ca8..1d7c142 100755 --- a/ampcombi/diamond_makedb.sh +++ b/ampcombi/diamond_makedb.sh @@ -5,13 +5,16 @@ ######################################### INPUT_FASTA=$1 +THREADS=$2 #OUTPUT_DIR=$2 read INPUT_FASTA +read THREADS #read OUTPUT_DIR IN=$INPUT_FASTA +P=$THREADS #OUT=$OUTPUT_DIR #cd $OUT -diamond makedb --in $IN -p 28 -d amp_ref --quiet \ No newline at end of file +diamond makedb --in $IN -p $P -d amp_ref --quiet \ No newline at end of file From 6381c6fe3f28d1612ce1e0fc4f119ac4f0c10e29 Mon Sep 17 00:00:00 2001 From: darcy220606 Date: Tue, 1 Nov 2022 16:13:44 +0100 Subject: [PATCH 13/15] Update readme to include DIAMOND threads --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c862d0f..bf29075 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,7 @@ ampcombi \ Here the head folder containing output files has to be given. AMPcombi finds and summarizes the output files from different tools, if the folder is structured and named as: `/result_folder/toolsubdir/samplesubdir/sample.tool.filetype`. - Note that the filetype ending might vary and can be specified with `--tooldict`, if it is different from the default. When passing a dictionary via command line, this has to be done as a string with single quotes `' '` and the dictionary keys and items with double quotes `" "`. i.e. `'{"key1":"item1", "key2":"item2"}'` +- Note that `--sample_list` can also be given if only specfic samples are needed from the driectory. The path to the folder containing the respective protein fasta files has to be provided with `--faa_folder`. The files have to be named with `.faa`. @@ -112,8 +113,9 @@ The path to the folder containing the respective protein fasta files has to be p | --faa_folder | path to the folder containing the samples` .faa files, Filenames have to contain the corresponding sample-name, i.e. sample_1.faa | ./test_faa/ | ./faa_files/| | --tooldict | dictionary of AMP-tools and their respective output file endings | '{"ampir":"ampir.tsv", "amplify":"amplify.tsv", "macrel":"macrel.tsv", "hmmer_hmmsearch":"hmmsearch.txt", "ensembleamppred":"ensembleamppred.txt"}' | - | | --amp_database | path to the folder containing the reference database files: (1) a fasta file with <.fasta> file extension and (2) the corresponding table with with functional and taxonomic classifications in <.tsv> file extension | [DRAMP 'general amps'](http://dramp.cpu-bioinfor.org/downloads/) database | ./amp_ref_database/ | -| --complete_summary | Concatenates all samples' summarized tables into one and generates both 'csv' and interactive 'html' files | False | True | +| --complete_summary | concatenates all samples' summarized tables into one and generates both 'csv' and interactive 'html' files | False | True | | --log | print messages into log file instead of stdout | False | True | +| --threads | adjust the number of threads required for DIAMOND alignemnt depending on the computing resources available | 4 | 32 | | --version | print the version number into stdout | - | 0.1.4 | - Note: The fasta file corresponding to the AMP database should not contain any characters other than ['A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y'] From 7845bfa7ede0b7a9f846845f0de37f6948f25b0e Mon Sep 17 00:00:00 2001 From: Louisa Perelo Date: Wed, 2 Nov 2022 10:31:34 +0100 Subject: [PATCH 14/15] fix: create_samplelist before check_input --- ampcombi/ampcombi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ampcombi/ampcombi.py b/ampcombi/ampcombi.py index cd4a545..0288f78 100755 --- a/ampcombi/ampcombi.py +++ b/ampcombi/ampcombi.py @@ -79,10 +79,10 @@ def main_workflow(): # print AMPcombi header print_header() - # check input parameters - check_input_complete(path, samplelist_in, filepaths_in, tools) # check input sample-list and create sample-list if input empty samplelist = check_samplelist(samplelist_in, tools, path) + # check input parameters + check_input_complete(path, samplelist, filepaths_in, tools) # check input filepaths and create list of list of filepaths per sample if input empty filepaths = check_pathlist(filepaths_in, samplelist, fileending, path) # check amp_ref_database filepaths and create a directory if input empty From e6de68a724b05b5a5b123a8d031ab40f3ce55485 Mon Sep 17 00:00:00 2001 From: Louisa Perelo Date: Wed, 2 Nov 2022 10:35:54 +0100 Subject: [PATCH 15/15] update CHANGES.md --- CHANGES.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 2bd9054..88c5915 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -10,5 +10,7 @@ v<0.1.5>, <27.10.2022> -- Initial release. -- adapt reading of hmmer_hmmsearch output to deal with varying header lines -- fix syntax in "if" statements in "check_input.py" -- include "check_faa_path" function, to find .faa files also in subdirectories -v<0.1.6>, <28.10.2022> - -- Included the HTML output for the complete summary \ No newline at end of file +v<0.1.6>, <02.11.2022> + -- Included the HTML output for the complete summary + -- add option --threads for diamond (make database and alignment) + -- included check if database was downloaded once to not download again \ No newline at end of file