-
Notifications
You must be signed in to change notification settings - Fork 1
/
assign_chromosome_number.py
executable file
·92 lines (73 loc) · 2.1 KB
/
assign_chromosome_number.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env python3
name = "assign_chromosome_number.py"
version = "0.1.1"
updated = "2023-05-20"
usage = f"""
NAME {name}
VERSION {version}
UPDATED {updated}
SYNOPSIS Utilizes chromsome map generated by orient_fastas_to_reference.py to homogenize naming between the reference
and the new assembly.
USAGE {name} \\
-m all.map \\
-f 50507.oriented.fasta \\
-o 50507
OPTIONS
-m (--map) Chromosome map file
-f (--fasta) Assembly FASTA file
-o (--outdir) Output directory [Default = CHROMOSOME_ASSIGNMENT]
"""
from sys import argv
if len(argv) < 2:
print(f"\n{usage}")
exit()
from argparse import ArgumentParser
from os import makedirs
from os.path import isdir,basename
from textwrap import wrap
GetOptions = ArgumentParser()
GetOptions.add_argument("-m","--map",required=True)
GetOptions.add_argument("-f","--fasta",required=True)
GetOptions.add_argument("-o","--outdir",default="CHROMOSOME_ASSIGNMENT")
args = GetOptions.parse_args()
map_file = args.map
fasta_file = args.fasta
outdir = args.outdir
if not isdir(outdir):
makedirs(outdir,mode=0o755)
filename = basename(fasta_file).split(".")[0]
FASTA = open(fasta_file,'r')
locus = ""
contigs = {}
for line in FASTA:
line = line.strip()
if line[0] == ">":
locus = line[1:]
contigs[locus] = ""
else:
contigs[locus] += line
FASTA.close()
MAP = open(map_file,'r')
mappings = {}
mapped_to = ""
for line in MAP:
line = line.strip()
if line != "":
if line[0:2] == ">>":
mapped_to = line[2:].split("\t")[0]
mappings[mapped_to] = []
elif line[0] == ">":
mapped,type = line[1:].split("\t")[0:2]
if type == "Primary":
mappings[mapped_to].append(mapped)
MAP.close()
ASSIGNED_FASTA = open(f"{outdir}/{filename}.assigned.fasta",'w')
ASSIGNMENTS = open(f"{outdir}/{filename}.chromosome_assignments",'w')
for chromosome in sorted(mappings.keys()):
for index,contig in enumerate(mappings[chromosome]):
ASSIGNED_FASTA.write(f">{chromosome}s{index+1}\n")
sequence = "\n".join(wrap(contigs[contig],60))
ASSIGNED_FASTA.write(f"{sequence}\n")
ASSIGNMENTS.write(f"{contig} => {chromosome}s{index+1}\n")
ASSIGNED_FASTA.close()
ASSIGNMENTS.close()