-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathDivideFasta.py
executable file
·40 lines (33 loc) · 1.09 KB
/
DivideFasta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env python
from Bio import Seq
from Bio import SeqIO
from Bio import SeqRecord
import sys
inFile = open(sys.argv[1])
L=int(sys.argv[2])
overlap=int(sys.argv[3])
base=sys.argv[4]
for seqRec in SeqIO.parse(inFile, "fasta"):
seq = str(seqRec.seq)
seqLen = len(seq)
nSeq = int(seqLen/L)
if seqLen % L > 0:
nSeq+=1
for idx in range(0,nSeq):
if idx == 0:
start=0
ovp=0
else:
start=idx*L-overlap
ovp=overlap
end=min((idx+1)*L, seqLen)
sub = seq[start:end]
# seqRec.id=seqRec.id.replace("/", "_").replace("|","_")
outFile=open(base+ "." + seqRec.id + "_"+str(start) + "_" + str(end) + "_" + str(ovp)+ "_" + str(idx) + ".fasta", 'w')
outFile.write(">"+seqRec.id+" "+str(start) + "/" + str(end) + "/" + str(ovp) + "/" + str(idx)+"\n")
last=int(len(sub)/60)
lines="\n".join([sub[j*60:(j+1)*60] for j in range(0,last)])
if len(sub) % 60 > 0:
lines += "\n" + sub[last*60:]
outFile.write(lines)
outFile.close()