-
Notifications
You must be signed in to change notification settings - Fork 0
/
1_download_gwas_gwasatlas.py
83 lines (48 loc) · 1.36 KB
/
1_download_gwas_gwasatlas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python
# coding: utf-8
# ```
# jupyter nbconvert 1_download_gwas_gwasatlas.ipynb --to script
#
# python 1_download_gwas_gwasatlas.py 0 200
#
# http://geneatlas.roslin.ed.ac.uk/downloads/?traits=0
# ftp://ftp.igmm.ed.ac.uk/pub/GeneATLAS/
# wget -r ftp://ftp.igmm.ed.ac.uk/pub/GeneATLAS/
# ```
# In[ ]:
#html = urlopen('http://geneatlas.roslin.ed.ac.uk/downloads/?traits=0')
#html_read=html.read()
#soup = BeautifulSoup(html_read)
#table_list=pd.read_html(html_read)
# In[49]:
import sys
import os
from urllib.request import urlopen
#from bs4 import BeautifulSoup
#import pandas as pd
# In[21]:
from ftplib import FTP
# In[45]:
start=int(sys.argv[1])
end=int(sys.argv[2])
# In[27]:
ftp = FTP('ftp.igmm.ed.ac.uk')
ftp.login()
ftp.cwd('pub/GeneATLAS')
# In[42]:
file_list=ftp.nlst()
# In[44]:
file_list_clip=file_list[start:end]
# In[40]:
for file in file_list_clip:
path_temp='/data01/ch6845/GeneATLAS/data/' + file+'.tmp'
path='/data01/ch6845/GeneATLAS/data/' + file
if os.path.exists(path):
print('{} already exists in {}'.format(file,path))
continue
else:
print('{} is being fetched to {}'.format(file,path))
with open(path_temp, 'wb') as localfile:
ftp.retrbinary('RETR ' + file, localfile.write);
os.rename(path_temp,path)
print('Downloading {} finished'.format(file));