-
Notifications
You must be signed in to change notification settings - Fork 1
/
blast_to_apollo_gff.py
executable file
·88 lines (65 loc) · 1.96 KB
/
blast_to_apollo_gff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env python3
name = 'blast_to_apollo_gff.py'
version = '0.2.1'
updated = '2023-05-31'
usage = f"""
NAME {name}
VERSION {version}
UPDATED {updated}
SYNOPSIS Converts tabular BLAST output files to Apollo compatible GFF3 format.
USAGE {name} \\
-b E_hellem_50604.tblastn.6 \\
-a E_hellem_50604.products
OPTIONS
-b (--blast) Tabular BLAST file
-a (--annots) Tab-separated file locus-annotation file for the query used
-o (--output) Output file [Default: blast.gff3]
"""
from sys import argv
if len(argv) < 2:
print(f"\n{usage}")
exit()
from argparse import ArgumentParser
from os.path import basename
GetOptions = ArgumentParser()
GetOptions.add_argument("-b","--blast",required=True)
GetOptions.add_argument("-a","--annots",required=True)
GetOptions.add_argument("-o","--output",default="blast.gff3")
args = GetOptions.parse_args()
blast_file = args.blast
annots_file = args.annots
output = args.output
basename = basename(blast_file).split(".")
filename = basename[0]
ext = basename[-2]
PRODUCTS = open(annots_file,'r')
products = {}
for line in PRODUCTS:
line.strip()
locus,annot = line.split("\t")[0:2]
products[locus] = annot.strip()
PRODUCTS.close()
BLAST = open(blast_file,'r')
GFF3 = open(output,'w')
match_num = 1
for line in BLAST:
line = line.strip()
data = line.split("\t")
query = data[0]
target = data[1]
tstart,tend = int(data[8]),int(data[9])
evalue = data[10]
strand = '+'
if tstart > tend:
strand = '-'
tstart,tend = tend,tstart
product = "Unavailable"
if query in products.keys():
product = products[query]
GFF3.write(f"{target}\t{ext}\tmatch\t{tstart}\t{tend}\t{evalue}\t{strand}\t.\t")
GFF3.write(f"ID=hit_{match_num};Name={query}:{product};Note={query}:{product}\n")
GFF3.write(f"{target}\t{ext}\tmatch_part\t{tstart}\t{tend}\t{evalue}\t{strand}\t.\t")
GFF3.write(f"gene_id=hit_{match_num};Parent=hit_{match_num};transcript_id=hit_{match_num}.t1;Note={query}:{product}\n")
match_num += 1
BLAST.close()
GFF3.close()