-
Notifications
You must be signed in to change notification settings - Fork 1
/
create_dir.py
80 lines (72 loc) · 2.76 KB
/
create_dir.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import pprint
import threading
def chunks(l,n):
"""Yield successive n-sized chunks from l.
"""
for i in range(0,len(l), n):
yield l[i:i+n]
def myfunc(number):
""" anti-crisper data preparing
mv gff, faa and fna into a new dir,
then user gzip -d to unpress .gz file,
And then download the phaster.ca/submissions/refseq_id.zip into the new dir.
"""
mydir = "/var/www/html/crisper/bacteria-complete"
nwdir = "/var/www/html/crisper/bacteria-complete-filter2"
nwdir_1 = "/var/www/html/crisper/bacteria-complete-filter1"
oldDirs = os.listdir(mydir)
nwdir_1s = os.listdir(nwdir_1)
aDirs = list(set(oldDirs)^set(nwdir_1s))
Dir_arr = list(chunks(aDirs,300))
Dirs = Dir_arr[number]
fw = open("/var/www/html/crisper_script/log3.txt","w")
for onedir in Dirs:
fullDir = mydir + '/' + onedir
nmydir1 = nwdir + '/' + onedir
if not os.path.exists(nmydir1):
os.makedirs(nmydir1,0777)
for root, dirs, files in os.walk(fullDir):
for myfile in files:
if (".gff.gz" in myfile) or \
(".faa.gz" in myfile) or \
(("_genomic.fna.gz" in myfile) and ("from" not in myfile)):
filename = fullDir + "/" + myfile
os.system('cp ' + filename + " " + nmydir1)
os.chdir(nmydir1)
os.system('gzip -d ' + nmydir1 + '/' + myfile)
if "fna" in myfile:
myfile_unpress = myfile[:-3]
f = open(nmydir1 + '/' + myfile_unpress)
h = f.readlines()
f.close()
for line in h:
if line[0]==">":
refseq = line[1:].split(" ")[0]
src = "phaster.ca/submissions/"+refseq+".zip"
os.system("wget "+src)
if not os.path.exists(nmydir1+'/'+ refseq+".zip"):
fw.write(nmydir1+'/'+ refseq+".zip")
mydir = "/var/www/html/crisper/bacteria-complete"
nwdir_1 = "/var/www/html/crisper/bacteria-complete-filter1"
nwdir = "/var/www/html/crisper/bacteria-complete-filter2"
i = 0
oldDirs = os.listdir(mydir)
nwdir_1s = os.listdir(nwdir_1)
Dirs = list(set(oldDirs)^set(nwdir_1s))
Dir_arr = list(chunks(Dirs,300))
Dir_len = len(Dir_arr)
''' Create the threads
use threading function to run the chunks
'''
threads = []
for i in range(Dir_len):
threads.append(threading.Thread(target=myfunc,args=(i,)))
# print threads
if __name__ == '__main__':
for t in threads:
#t.setDaemon(True)
t.start()
for t in threads:
t.join()
# myfunc(Dirs, mydir, nwdir)