Skip to content

Commit

Permalink
updated phage plotting
Browse files Browse the repository at this point in the history
  • Loading branch information
mult1fractal committed Jul 31, 2024
1 parent ac375fb commit d9d70f1
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 10 deletions.
83 changes: 83 additions & 0 deletions bin/phage_plot_preliminary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!/usr/bin/env python3
# how to use root@9a899e99ea29:/input# python plasmidplot.py --gff3 contig_255_barcode01_bakta.gff3 --name contig_225_barcode01_plasmidplot --contig contig_225 --ring1 salmon --ring2 skyblue --limit 30
# docker run --rm -it -v $PWD:/input nanozoo/pycirclize:3.12--48de646 /bin/bash
# remove hypothetical Proteins: grep "ID=" contig_9_barcode2_bakta.gff3 | grep -v "Name=hypothetical protein" >without_hypothetical_protein.gff3


from pycirclize import Circos
from pycirclize.utils import fetch_genbank_by_accid
from pycirclize.parser import Genbank
from pycirclize.parser import Gff
import argparse

################################################################################
## Initialization

parser = argparse.ArgumentParser(description = 'Create json-file for upload to MongoDB from different result-files.')

#define arguments
parser.add_argument('-g', '--gff3', help = "Input gff3-files", default = 'false')
parser.add_argument('-n', '--name', help = "Plotname", default = 'false')
parser.add_argument('-c', '--contig', help = "Contig to plot", default = 'false')
parser.add_argument('-1', '--ring1', help = "Ring color 1", default = 'salmon')
parser.add_argument('-2', '--ring2', help = "Ring color 1", default = 'skyblue')
parser.add_argument('-l', '--limit', help = "character limit for text, after ... is added", default = 30)

#parsing:
arg = parser.parse_args()

################################################################################
## INPUT

gff_file = arg.gff3
plotname = arg.name
contigname = arg.contig
ringnr1 = arg.ring1
ringnr2 = arg.ring2
charlimit = arg.limit

# read file
gff = Gff(gff_file=gff_file, name=plotname + "\n" + contigname,target_seqid=contigname)

# Initialize Circos instance with genome size
circos = Circos(sectors={gff.name: gff.range_size})
circos.text(f"{gff.name}\n\n{gff.range_size}bp", size=14)
circos.rect(r_lim=(90, 100), fc="lightgrey", ec="none", alpha=0.5)
sector = circos.sectors[0]

# Plot forward strand CDS
f_cds_track = sector.add_track((95, 100))
f_cds_feats = gff.extract_features("CDS", target_strand=1)
f_cds_track.genomic_features(f_cds_feats, plotstyle="arrow", fc=ringnr1, lw=0.5)

# Plot reverse strand CDS
r_cds_track = sector.add_track((90, 95))
r_cds_feats = gff.extract_features("CDS", target_strand=-1)
r_cds_track.genomic_features(r_cds_feats, plotstyle="arrow", fc=ringnr2, lw=0.5)

# Plot 'gene' qualifier label if exists
labels, label_pos_list = [], []
for feat in gff.extract_features("CDS"):
start = int(str(feat.location.start))
end = int(str(feat.location.end))
label_pos = (start + end) / 2
gene_name = feat.qualifiers.get("gene", [None])[0]
name = feat.qualifiers.get("function", [""])[0]
if len(name) > int(charlimit):
name = name[:int(charlimit)] + "..."
if gene_name is not None:
labels.append(gene_name)
label_pos_list.append(label_pos)
else:
labels.append(name)
label_pos_list.append(label_pos)

f_cds_track.xticks(label_pos_list, labels, label_size=6, label_orientation="vertical")

# Plot xticks (interval = 10 Kb)
r_cds_track.xticks_by_interval(
10000, outer=False, label_formatter=lambda v: f"{v/1000:.1f} Kb"
)

circos.savefig(plotname + "_" + contigname + ".jpg", dpi=300)
circos.savefig(plotname + "_" + contigname + ".svg", dpi=200)
1 change: 1 addition & 0 deletions configs/container.config
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ process {
withLabel: virsorter { container = 'multifractal/virsorter:0.1.2' }
withLabel: phigaro { container = 'multifractal/phigaro:0.5.2' }
withLabel: pharokka { container = 'multifractal/pharokka:v1.7_seqkit' }
withLabel: phage_plot { container = 'nanozoo/pycirclize:3.12--48de646' }
withLabel: virsorter2 { container = 'papanikos/virsorter-2:2.2.1--fa935f8' }
withLabel: seeker { container = 'multifractal/seeker:0.1' }
withLabel: rmarkdown { container = 'nanozoo/rmarkdown:2.10--a3f4088' }
Expand Down
6 changes: 5 additions & 1 deletion workflows/phage_annotation_wf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,11 @@ workflow phage_annotation_wf {
//annotation via pharokka
pharokka(fasta)

pharokka_plotter(fasta,pharokka.out.pharokka_folder_ch,checkv)
plot_in= fasta
.join( pharokka.out)
.join( checkv)
plot_in.view()
pharokka_plotter(plot_in)

emit: annotationtable_markdown_input

Expand Down
39 changes: 30 additions & 9 deletions workflows/process/phage_annotation/pharokka.nf
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
process pharokka {
publishDir "${params.output}/${name}/pharokka", mode: 'copy'
errorStrategy 'ignore'
label 'pharokka'
input:
tuple val(name), path(fasta)
output:
path("*_pharokka_out"), emit: pharokka_folder_ch optional true
tuple val(name), path("*_pharokka_out"), emit: pharokka_folder_ch optional true
script:
"""
pharokka.py -i ${fasta} -o ${name}_pharokka_out -t 10 -d /pharokka_v1.4.0_databases -f -p ${name}
pharokka.py -i ${fasta} -o ${name}_pharokka_out -t 10 -d /pharokka_v1.4.0_databases -t ${task.cpus} -f -p ${name}
"""
stub:
Expand All @@ -21,21 +22,41 @@ process pharokka {

process pharokka_plotter {
publishDir "${params.output}/${name}/pharokka", mode: 'copy'
errorStrategy 'ignore'
label 'pharokka'
input:
tuple val(name), path(fasta)
path(pharokka_annotation_out)
tuple val(sec_name), path(checkv_results)
tuple val(name), path(fasta) , path(pharokka_annotation_out), path(checkv_results)
output:
tuple val(name), path("pharokka_plots"), emit: annotation_map_ch optional true
script:
"""
## split fasta to single contigs needed
## LC_ALL=C allow awk to use float numbers
LC_ALL=C awk '{if(\$9>${params.plot_completeness})print\$1}' < ${checkv_results} |tail -n+2 > tmp_contigs_to_plot.tsv
pharokka_plotter_parser.py --input ${pharokka_annotation_out}/${name}.gbk --contigs_to_extract tmp_contigs_to_plot.tsv
cat *gbk > selected_${name}.gbk
pharokka_multiplotter.py -g selected_${name}.gbk -o pharokka_plots --dpi 600 -f
LC_ALL=C awk '{if(\$9>${params.plot_completeness} && \$2>5000)print\$1}' < ${checkv_results} |tail -n+2 > tmp_contigs_to_plot_${name}.tsv
## remove hypoithetical proteins from plot
grep -v "name=hypothetical protein" ${pharokka_annotation_out}/${name}.gff > ${name}.gff
mkdir pharokka_plots
## check if tmp file is empty
if [ -s tmp_contigs_to_plot_${name}.tsv ]; then
# The file is not-empty.
## plot
while read LINE; do
phage_plot_preliminary.py --gff3 ${name}.gff --name "\$LINE"_plot --contig \$LINE --ring1 salmon --ring2 skyblue --limit 30
done < tmp_contigs_to_plot_${name}.tsv
else
# The file is empty.
touch pharokka_plots/nothing_to_plot.txt
fi
mv *.jpg pharokka_plots
mv *.svg pharokka_plots
##pharokka_plotter_parser.py --input ${pharokka_annotation_out}/${name}.gbk --contigs_to_extract tmp_contigs_to_plot.tsv
##cat *gbk > selected_${name}.gbk
##pharokka_multiplotter.py -g selected_${name}.gbk -o pharokka_plots --dpi 600 -f
"""
stub:
Expand Down

0 comments on commit d9d70f1

Please sign in to comment.