Skip to content

Commit

Permalink
pharokka annotation working phage map building not yet
Browse files Browse the repository at this point in the history
  • Loading branch information
mult1fractal committed Jul 26, 2024
1 parent 6bf724e commit 542292f
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 7 deletions.
Empty file added bin/pharokka_plotter_parser.sh
Empty file.
1 change: 1 addition & 0 deletions configs/container.config
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ process {
withLabel: virnet { container = 'multifractal/virnet-hack:0.1' }
withLabel: virsorter { container = 'multifractal/virsorter:0.1.2' }
withLabel: phigaro { container = 'multifractal/phigaro:0.5.2' }
withLabel: pharokka { container = 'multifractal/pharokka:v1.7_seqkit' }
withLabel: virsorter2 { container = 'papanikos/virsorter-2:2.2.1--fa935f8' }
withLabel: seeker { container = 'multifractal/seeker:0.1' }
withLabel: rmarkdown { container = 'nanozoo/rmarkdown:2.10--a3f4088' }
Expand Down
4 changes: 2 additions & 2 deletions phage.nf
Original file line number Diff line number Diff line change
Expand Up @@ -231,9 +231,9 @@ workflow {
**************************/
// run annotation if annotate flag or no flag at all
if ( params.fasta && params.annotate && !params.identify && !params.setup || params.fasta && !params.identify && !params.annotate && !params.setup ) {
// actual tools
phage_annotation_wf(annotation_channel)
// actual tools
checkV_wf(annotation_channel)
phage_annotation_wf(annotation_channel, checkV_wf.out)
phage_tax_classification_wf(annotation_channel)
// markdown report input
phage_annotation_wf.out
Expand Down
12 changes: 12 additions & 0 deletions workflows/phage_annotation_wf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@ include { prodigal } from './process/phage_annotation/prodigal'
include { hmmscan } from './process/phage_annotation/hmmscan'
include { chromomap_parser } from './process/phage_annotation/chromomap_parser'
include { chromomap } from './process/phage_annotation/chromomap'
include { pharokka } from './process/phage_annotation/pharokka'
include { pharokka_plotter } from './process/phage_annotation/pharokka'

workflow phage_annotation_wf {
take: fasta_and_tool_results
checkv
main:
// Input for custom annotation database
if (params.annotation_db) { annotation_custom_db_ch = Channel
Expand All @@ -28,6 +31,15 @@ workflow phage_annotation_wf {

annotationtable_markdown_input = chromomap_parser.out.annotationfile_combined_ch

//annotation via pharokka
pharokka(fasta)

fasta.view()
pharokka.out.pharokka_folder_ch.view()
checkv.view()

pharokka_plotter(fasta,pharokka.out.pharokka_folder_ch,checkv)

emit: annotationtable_markdown_input

}
Expand Down
106 changes: 106 additions & 0 deletions workflows/process/phage_annotation/pharokka.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
process pharokka {
publishDir "${params.output}/${name}/pharokka", mode: 'copy'
label 'pharokka'
input:
tuple val(name), path(fasta)
output:
path("*_pharokka_out"), emit: pharokka_folder_ch optional true
script:
"""
pharokka.py -i ${fasta} -o ${name}_pharokka_out -t 10 -d /pharokka_v1.4.0_databases -f -p ${name}
"""
stub:
"""
mkdir stub_pharokka_out
"""
}


process pharokka_plotter {
publishDir "${params.output}/${name}/pharokka", mode: 'copy'
label 'pharokka'
input:
tuple val(name), path(fasta)
path(pharokka_annotation_out)
tuple val(sec_name), path(checkv_results)
output:
tuple val(name), path("pharokka_plots"), emit: annotation_map_ch optional true
script:
"""
## split fasta to single contigs needed
## LC_ALL=C allow awk to use float numbers
LC_ALL=C awk '{if(\$9>75.00)print\$1}' < ${checkv_results} |tail -n+2 > tmp_contigs_to_plot.tsv
## extract contigs based on list
mkdir fastas_to_plot
mkdir fastas_to_plot/sub
awk -F'>' 'NR==FNR{ids[\$0]; next} NF>1{f=(\$2 in ids)} f' tmp_contigs_to_plot.tsv ${fasta} > fastas_to_plot/sub/tmp_contigs_to_plot.fasta
## split fasta into several files
cd fastas_to_plot/ && awk -F "|" '/^>/ {close(F); ID=\$1; gsub("^>", "", ID); F=ID".fasta"} {print >> F}' sub/tmp_contigs_to_plot.fasta && cd ..
for i in fastas_to_plot/*.fasta; do
contig_name=\$(echo \$i |cut -d"/" -f2 |cut -d"." -f1)
pharokka_multiplotter.py -g ${pharokka_annotation_out}/.gbk -o pharokka_plots
pharokka_plotter.py -i \$i -n "\$contig_name"_annotation_map -o ${pharokka_annotation_out} --interval 8000 --annotations 0.5 --plot_title "\$contig_name" -p ${name} -f
## mv ${pharokka_annotation_out}/_annotation_map.png "\$contig_name"_annotation_map.png
## mv ${pharokka_annotation_out}/_annotation_map.svg "\$contig_name"_annotation_map.svg
mv ${pharokka_annotation_out}/*_annotation_map.png .
mv ${pharokka_annotation_out}/*_annotation_map.svg .
done
"""
stub:
"""
touch stub.svg
touch stub.png
"""
}

// pharokka_plotter.py -i all_pos_phage_filtered.fa -n all_pos_phage_annotation_map -o all_pos_phage_pharokka_out --interval 8000 --annotations 0.5 --plot_title 'test_Phage' -p all_pos_phage
// split fasta and keep contigname as a file name:awk 'BEGIN{RS=">";FS="\n"} NR>1{fnme=$1".fasta"; print ">" $0 > fnme; close(fnme);}' example.fasta

//
LOCUS pos_phage_9 221908 bp DNA linear PHG 25-JUL-2024
DEFINITION pos_phage_9.
ACCESSION pos_phage_9
VERSION pos_phage_9
KEYWORDS .
SOURCE .
ORGANISM .
.
FEATURES Location/Qualifiers
CDS 1..753
/ID="EPQHBJFJ_CDS_0949"
/transl_table=11
/phrog="246"
/top_hit="p216000 VI_03943"
/locus_tag="EPQHBJFJ_CDS_0949"
/function="other"
/product="NinI-like serine-threonine phosphatase"
/source="PHANOTATE_1.5.1"
/score="-139481.4743619502"
/phase="0"
/translation="MNKRLLVISDIHGEYDMFVRLLDKVKYNPQTCQLMLLGDFVDKGP
KSREVIELVMKLVAGGAKASLGNHELSFLRWLQGDRSRFHSSTSSTFRSYVYTSGNARR
KYTRFELDEGRRYILKRYKHHVNFLKSLPYYFEDDEHVYVHAGFDSSSDDWRKNTKTDD
FVWIRERFYNNPTNEEKITVFGHTKCRRLHDSDDPWFDGDKIGIDGGASEQGQLNCLEI
LGNKEYRIHKVFARNVRGGNQTYGKQAM"
ORIGIN
1 atgaataagc gattattagt aatatcggac atacacggag agtacgatat gttcgttagg
61 ttattagata aagtaaagta taatccccaa acttgccaac taatgttact cggtgacttc
121 gtggataaag gtccaaagtc acgtgaggta atcgagcttg tgatgaagtt agtcgctggt
181 ggtgcaaaag cgtcacttgg taatcacgag ttgtcgttct tacgttggtt gcaaggcgat
241 cgatcccgat tccattccag tacatcttca acctttagga gttatgttta cacttcaggt
301 aacgctagac gtaagtacac tcggtttgaa ttagatgagg gtagacgtta tatacttaag
361 cgttataagc atcatgtcaa tttcttaaag tcattgcctt attactttga agacgatgag
421 catgtatacg ttcatgctgg attcgattca tcatccgacg actggagaaa gaacacgaag
481 acggatgact tcgtatggat tagagaacgt ttttataaca atccaactaa tgaagaaaag
//
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,8 @@ process sourmash_for_tax {
tuple val(name), path("${name}_tax-class.tsv"), emit: tax_class_ch optional true
shell:
"""
<<<<<<< HEAD
###//set -euxo pipefail
=======
set -euxo pipefail
### set -euxo pipefail
>>>>>>> 96d12cd78813b43bed5e5d4a53e7cad86287962b
for fastafile in ${fasta_dir}/*.fa; do
sourmash sketch dna -p k=21,scaled=100 \${fastafile}
done
Expand Down

0 comments on commit 542292f

Please sign in to comment.