-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawler_drugbank_encymes.js
145 lines (130 loc) · 6.4 KB
/
crawler_drugbank_encymes.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
//load JQuery
var jq = document.createElement('script');
jq.src = "https://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js";
var urls = ["http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/36",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/46",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/2",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/26",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/22",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/43",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/12",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/16",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/6",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/19",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/49",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/35",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/20",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/45",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/39",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/30",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/29",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/25",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/1",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/50",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/21",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/44",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/11",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/9",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/40",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/5",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/15",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/18",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/38",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/48",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/34",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/31",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/51",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/28",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/41",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/14",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/4",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/55",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/8",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/24",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/10",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/7",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/17",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/37",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/32",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/47",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/33",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/42",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/52",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/23",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/27",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/13",
"http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/enzymes/3"];
var ws;
var text = "";
var urls_original_length = urls.length;
var missed_uris = [];
function loadTriples(i) {
var uri = urls[i];
var counter = 0;
try {
while (true) {
if (ws.document.body.children[0].innerHTML == "Service Temporarily Unavailable") {
//Server blockiert Anfrage
ws.close();
missed_uris.push(urls[i]);
break;
} else {
//Seite ordentlich geladen
var predicate1 = ws.document.getElementsByClassName("odd")[counter].children[0].children[0].title;
var object1 = ws.document.getElementsByClassName("odd")[counter].children[1].children[0].innerHTML.replace("\n", "");
if (object1 == "<") {
object1 = ws.document.getElementsByClassName("odd")[counter].children[1].children[1].href;
}
var triple;
if (predicate1 == "") {
predicate1 = ws.document.getElementsByClassName("odd")[counter].children[0].children[1].href;
triple = "<" + object1 + "> <" + predicate1 + "> <" + uri + "> .\n";
} else {
triple = "<" + uri + "> <" + predicate1 + "> <" + object1 + "> .\n";
}
//console.log(triple);
text += triple;
var predicate2 = ws.document.getElementsByClassName("even")[counter].children[0].children[0].title;
var object2 = ws.document.getElementsByClassName("even")[counter].children[1].children[0].innerHTML.replace("\n", "");
if (object2 == "<") {
object2 = ws.document.getElementsByClassName("even")[counter].children[1].children[1].href;
}
if (predicate2 == "") {
predicate2 = ws.document.getElementsByClassName("even")[counter].children[0].children[1].href;
triple = "<" + object2 + "> <" + predicate2 + "> <" + uri + "> .\n";
} else {
triple = "<" + uri + "> <" + predicate2 + "> <" + object2 + "> .\n";
}
//console.log(triple);
text += triple;
counter += 1;
}
}
}catch (err) {
//console.log(err);
//console.log(counter);
}
//console.log(text);
}
function load(i) {
//console.log(urls[i]);
console.log(i);
ws = window.open(urls[i]);
setTimeout(function() {loadTriples(i) }, 8000);
}
function f(iter) {
if (iter > 0) {
ws.close();
}
if (iter < urls.length) {
load(iter);
setTimeout(function() { f(iter + 1) }, 10000);
} else if (iter < (urls_original_length + missed_uris.lenght)) {
urls = missed_uris;
load(iter);
setTimeout(function() { f(iter + 1) }, 10000);
} else {
console.log(text);
}
}
f(0);