-
Notifications
You must be signed in to change notification settings - Fork 7
/
extract_genbank_information.py
78 lines (61 loc) · 1.79 KB
/
extract_genbank_information.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import sys
import json
import argparse
from itertools import chain
try:
from Bio import SeqIO
except ImportError:
print('BioPython not installed!')
print('Please visit https://biopython.org/wiki/Download for instructions.')
sys.exit(1)
parser = argparse.ArgumentParser(
description='Write GenBank information to a JSON file for alignment.js.'
)
parser.add_argument(
'-i', '--input',
metavar='INPUT',
type=str,
help='Input GenBank file',
required=True
)
parser.add_argument(
'-o', '--output',
metavar='OUTPUT',
type=str,
help='Output JSON file',
required=True
)
args = parser.parse_args()
def extract_site_information(feature):
return list(chain(*[
[
{
'name': feature.qualifiers['site_type'][0],
'site': site,
'type': 'site'
}
for site in part
]
for part in feature.location.parts
]))
def extract_region_information(feature):
return {
'name': feature.qualifiers['region_name'][0],
'start': int(feature.location.start),
'end': int(feature.location.end),
'type': 'region'
}
def extract_functional_information(features):
all_information = []
for feature in features:
if feature.type == 'Site':
result = extract_site_information(feature)
all_information.append(result)
elif feature.type == 'Region':
result = extract_region_information(feature)
all_information.append([result])
return sum(all_information, [])
record = next(SeqIO.parse(args.input, 'genbank'))
information = extract_functional_information(record.features)
with open(args.output, 'w') as json_file:
json.dump(information, json_file, indent=2)