-
Notifications
You must be signed in to change notification settings - Fork 2
/
install.py
executable file
·93 lines (85 loc) · 5.21 KB
/
install.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import re
from defs import CRAWLTYPE_LINE as LINE, CRAWLTYPE_FILE as FILE
def _create_databank(name, reference, filelink, regex, crawltype,
parent_name=None):
"""
Create a databank entry
- a name
- a crawltype, either LINE to obtain entries from lines in a single file,
(like PDBFINDER) or FILE to presume that entries are files within a
directory (like PDB)
- a filelink, a macro for generating a link to a file, given it's pdbid,
part simply means: the 2nd and 3rd character of the pdbid
- a reference: link to a web page that explains the databank
- a regex: a pattern that filenames/lines should match to be included as an
entry.
- a parent name: name of an other databank that it depends on. This field
is not set for the root databank. Entries are considered missing if their
parent has a certain pdbid entry, but the child has not. Obsolete is the
opposite of missing.
"""
doc = {
'name': name,
'crawltype': crawltype,
'filelink': filelink,
'reference': reference,
'regex': regex
}
if parent_name:
doc['parent_name'] = parent_name
return doc
def create_databanks():
docs = []
docs.append(_create_databank('MMCIF','http://www.wwpdb.org/',
'ftp://ftp.wwpdb.org/pub/pdb/data/structures/divided/mmCIF/${PART}/${PDBID}.cif.gz',
re.compile(r'.*/([\w]{4})\.cif(\.gz)?'),FILE))
docs.append(_create_databank('PDB','http://www.wwpdb.org/',
'ftp://ftp.wwpdb.org/pub/pdb/data/structures/divided/pdb/${PART}/pdb${PDBID}.ent.gz',
re.compile(r'.*/pdb([\w]{4})\.ent(\.gz)?'),FILE,'MMCIF'))
docs.append(_create_databank('BDB','http://www.cmbi.umcn.nl/bdb/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/bdb/${PART}/${PDBID}/${PDBID}.bdb',
re.compile(r'.*/([\w]{4})\.bdb'),FILE,'PDB'))
docs.append(_create_databank('DSSP','http://swift.cmbi.umcn.nl/gv/dssp/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/dssp/${PDBID}.dssp',
re.compile(r'.*/([\w]{4})\.dssp'),FILE,'MMCIF'))
docs.append(_create_databank('HSSP','http://swift.cmbi.umcn.nl/gv/hssp/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/hssp/${PDBID}.hssp.bz2',
re.compile(r'.*/([\w]{4})\.hssp.bz2'),FILE,'DSSP'))
docs.append(_create_databank('PDBFINDER','http://swift.cmbi.umcn.nl/gv/pdbfinder/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/pdbfinder/PDBFIND.TXT.gz',
re.compile(r'ID : ([\w]{4})'),LINE,'PDB'))
docs.append(_create_databank('PDBFINDER2','http://swift.cmbi.umcn.nl/gv/pdbfinder/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/pdbfinder2/PDBFIND2.TXT.gz',
re.compile(r'ID : ([\w]{4})'),LINE,'PDBFINDER'))
docs.append(_create_databank('NMR','http://www.bmrb.wisc.edu/',
'ftp://ftp.wwpdb.org/pub/pdb/data/structures/all/nmr_restraints/${PDBID}.mr.gz',
re.compile(r'.*/([\w]{4}).mr.gz'),FILE,'PDB'))
docs.append(_create_databank('STRUCTUREFACTORS','http://www.pdb.org/',
'ftp://ftp.wwpdb.org/pub/pdb/data/structures/divided/structure_factors/${PART}/r${PDBID}sf.ent.gz',
re.compile(r'.*/r([\w]{4})sf\.ent\.gz'),FILE,'MMCIF'))
docs.append(_create_databank('PDBREPORT','http://swift.cmbi.umcn.nl/gv/pdbreport/',
'http://www.cmbi.umcn.nl/pdbreport/cgi-bin/nonotes?PDBID=${PDBID}',
re.compile(r'pdbreport\/\w{2}\/(\w{4})\/pdbout\.txt'),FILE,'PDB'))
docs.append(_create_databank('PDB_REDO','https://pdb-redo.eu/',
"https://pdb-redo.eu/db/${PDBID}/${PDBID}_final.pdb",
re.compile(r'\/\w{2}\/\w{4}\/(\w{4})_final\.pdb'),FILE,'STRUCTUREFACTORS'))
docs.append(_create_databank('DSSP_REDO','http://swift.cmbi.umcn.nl/gv/dssp/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/dssp_redo/${PDBID}.dssp',
re.compile(r'.*/([\w]{4})\.dssp'),FILE,'PDB_REDO'))
for lis in ['dsp','iod','sbh','sbr','ss1','ss2','tau','acc','cal','wat',
'cc1','cc2','cc3','chi']:
docs.append(_create_databank('WHATIF_PDB_%s' % lis, 'http://swift.cmbi.umcn.nl/whatif/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/wi-lists/pdb/%s/${PDBID}/${PDBID}.%s.bz2' % (lis, lis),
re.compile(r'.*/([\w]{4})\.' + lis + r'(\.bz2)?$'),FILE,'PDB'))
docs.append(_create_databank('WHATIF_REDO_%s' % lis, 'http://swift.cmbi.umcn.nl/whatif/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/wi-lists/redo/%s/${PDBID}/${PDBID}.%s.bz2' % (lis, lis),
re.compile(r'.*/([\w]{4})\.' + lis + r'(\.bz2)?$'),FILE,'PDB_REDO'))
scenames = { 'ss2': 'sym-contacts', 'iod': 'ion-sites'}
for lis in scenames:
docs.append(_create_databank('PDB_SCENES_%s' % lis, 'http://www.cmbi.umcn.nl/pdb-vis/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/wi-lists/pdb/scenes/%s/${PDBID}/${PDBID}_%s.sce' % (lis, scenames[lis]),
re.compile(r'.*/([\w]{4})_' + scenames[lis] + r'\.sce'),FILE,'WHATIF_PDB_%s' % lis))
docs.append(_create_databank('REDO_SCENES_%s' % lis, 'http://www.cmbi.umcn.nl/pdb-vis/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/wi-lists/redo/scenes/%s/${PDBID}/${PDBID}_%s.sce' % (lis, scenames[lis]),
re.compile(r'.*/([\w]{4})_' + scenames[lis] + r'\.sce'),FILE,'WHATIF_REDO_%s' % lis))
return docs