-
Notifications
You must be signed in to change notification settings - Fork 0
/
fetch_reference_data.py
executable file
·63 lines (50 loc) · 1.98 KB
/
fetch_reference_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python
import os
try:
from urllib.request import urlretrieve
except ImportError:
from urllib import urlretrieve
BASEURL = "https://orthology.benchmarkservice.org/refsets"
def get_file_list(release):
files = [
'Summaries.drw.gz',
'GOdata.drw.gz',
'mapping.json.gz',
'ServerIndexed.db',
'ServerIndexed.db.map',
'ServerIndexed.db.tree',
'enzymes.drw.gz',
'speciestree_Luca_conf81.drw',
'speciestree_Euk_conf81.drw',
'speciestree_Ver_conf81.drw',
'speciestree_Fun_conf81.drw',
'ReconciledTrees_SwissTrees.drw',
'ReconciledTrees_TreeFam-A.drw',
'TreeCat_Euk.drw',
'TreeCat_Bac.drw',
'TreeCat_Fun.drw']
if release != "2011":
files.extend([
'lineage_tree.phyloxml',
'species_tree_samples_Luca.nwk',
'species_tree_samples_Ver.nwk',
'species_tree_samples_Euk.nwk',
'species_tree_samples_Fun.nwk'])
return [os.path.join(BASEURL, str(release), f) for f in files]
def retrieve_files(files, target_dir):
if not os.path.exists(target_dir):
os.makedirs(target_dir)
for url in files:
fname = os.path.basename(url)
target = os.path.join(target_dir, fname)
urlretrieve(url, target)
if __name__ == "__main__":
import argparse
p = argparse.ArgumentParser(description="Download reference data of a given release for the QfO benchmarking platform")
p.add_argument('release', choices=("2011", "2018", "2019", "2020", "2020.1"), help="release version to download")
p.add_argument('--out-dir', help="directory where to store the data. Defaults to ./reference_data/<release>")
conf = p.parse_args()
if conf.out_dir is None:
conf.out_dir = os.path.join("reference_data", str(conf.release))
retrieve_files(get_file_list(conf.release), conf.out_dir)
print("Finished downloading data for release {}. Stored in {}".format(conf.release, conf.out_dir))