-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbedhunter.js
executable file
·243 lines (213 loc) · 8.29 KB
/
bedhunter.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
#! /usr/bin/env node
/* The dynamic heuristic loading throws ESLint for a loop. */
/* eslint-disable global-require */
var fs = require('fs');
var path = require('path');
var kijiji = require('kijiji-scraper');
var sqlite3 = require('sqlite3');
var config = require('./config.json');
config.query.prefs.scrapeInnerAd = false;
if (config.notification.mode === undefined) {
console.log('Warning: no notification mode defined. Falling back to "debug".')
config.notification.mode = 'debug';
}
var notifier = require('./notifiers/' + config.notification.mode + '.js')(config.notification);
var heuristics = {};
fs.readdirSync(path.join(__dirname, 'heuristics')).forEach(function(file) {
if (file.match(/.js$/)) {
var heuristic = require('./heuristics/' + file);
for (var key in heuristic) {
heuristics[key] = heuristic[key];
}
}
});
var db = new sqlite3.Database('bedhunter.db');
var defineSchema = function () {
return new Promise(function (resolve) {
console.log('Defining database schema...');
/* TODO: Promises. My kingdom for promises. */
db.run('create table if not exists ad (link text primary key, summary json, details json, notified bool default 0)', function () {
db.run('create table if not exists score (link text, heuristic text, score double, foreign key (link) references ad (link))', function () {
db.run('drop table if exists heuristic', function () {
db.run('create table heuristic (heuristic text primary key)', function () {
var stmt = db.prepare('insert into heuristic (heuristic) values (?)');
var inserted = 0;
var heuristicKeys = Object.keys(heuristics);
heuristicKeys.forEach(function (heuristic) {
stmt.run(heuristic, function () {
if (++inserted === heuristicKeys.length) {
console.log('Defined heuristics in database.');
stmt.finalize();
resolve();
}
});
});
});
});
});
});
});
};
/**
* Check Kijiji for new ads. Does not query into them.
*/
var queryAdsIntoDatabase = function () {
return new Promise(function (resolve) {
console.log('Querying latest ads...');
kijiji.query(config.query.prefs, config.query.params, function (err, ads) {
if (err !== null) {
console.log(err);
resolve();
} else if (ads.length === 0) {
console.log('Kijiji returned no ads.');
resolve();
} else {
var stmt = db.prepare('insert into ad (link, summary) values (?, ?)');
var inserted = 0;
ads.forEach(function (ad) {
stmt.run(ad.link, JSON.stringify(ad), function () {
if (++inserted === ads.length) {
console.log('Retrieved latest ads.');
stmt.finalize();
resolve();
}
});
});
}
});
});
};
var scrapeAllUnscrapedAds = function () {
return new Promise(function (resolve) {
console.log('Retrieving ad details...');
db.all('select link, summary from ad where details is null', function (err, ads) {
if (err !== null)
{
console.log(err);
resolve();
} else if (ads.length === 0) {
console.log('No ads without details.');
resolve();
} else {
var stmt = db.prepare('update ad set details = ? where link = ?');
var inserted = 0;
ads.forEach(function (ad) {
kijiji.scrape(ad.link, function (err, details) {
if (err === null) {
ad.innerAd = details;
stmt.run(JSON.stringify(details), ad.link);
}
if (++inserted === ads.length) {
console.log('Retrieved ad details.');
stmt.finalize();
resolve();
}
});
});
}
});
});
};
var scoreAllAdsMissingScores = function () {
return new Promise(function (resolve) {
console.log('Scoring ads...');
db.all(`
select heuristic.heuristic,
ad.summary,
ad.details
from heuristic
left join ad
left join score on score.link = ad.link and score.heuristic = heuristic.heuristic
where score.score is null
and ad.details is not null
`, function (err, rows) {
if (err !== null) {
console.log(err);
resolve();
} else if (rows.length === 0) {
console.log('No unscored ads.');
resolve();
} else {
var stmt = db.prepare('insert into score (link, heuristic, score) values (?, ?, ?)');
var inserted = 0;
rows.forEach(function (row) {
var ad = JSON.parse(row.summary);
ad.innerAd = JSON.parse(row.details);
heuristics[row.heuristic](ad, function (err, score) {
if (err === null) {
stmt.run(ad.link, row.heuristic, score);
}
if (++inserted === rows.length) {
console.log('Scored ads.');
stmt.finalize();
resolve();
}
});
});
}
});
});
/* You might think this pyramid of doom is heinous. You'd be correct. */
};
var sendNotificationsForNewAds = function () {
return new Promise(function (resolve) {
console.log('Sending notifications for new ads...');
var ads = {};
db.all(`
select *
from (select ad.link,
ad.summary,
ad.details,
onescore.heuristic,
onescore.score,
min(allscore.score) as minscore,
count(allscore.score) as scorecount
from ad, score onescore, score allscore
where onescore.link = ad.link
and allscore.link = ad.link
and ad.notified = 0
group by ad.link, onescore.heuristic)
where minscore >= ?
and scorecount >= (select count(*) from heuristic)
`, config.notification.scoreThreshold, function (err, rows) {
if (err !== null) {
console.log(err);
resolve();
} else if (rows.length === 0) {
console.log('No unnotified ads.');
resolve();
} else {
rows.forEach(function (row) {
if (ads[row.link] === undefined) {
var summary = JSON.parse(row.summary);
var details = JSON.parse(row.details);
ads[row.link] = {
link: row.link,
title: summary.title,
price: details.info.Price,
image: details.image,
scores: {}
};
}
ads[row.link].scores[row.heuristic] = row.score;
});
for (var ad in ads) {
notifier(ads[ad]);
db.run('update ad set notified = 1 where link = ?', ads[ad].link);
}
console.log('Sent notifications.');
resolve();
}
});
});
};
var process = function ()
{
queryAdsIntoDatabase()
.then(() => scrapeAllUnscrapedAds())
.then(() => scoreAllAdsMissingScores())
.then(() => sendNotificationsForNewAds())
.then(() => setTimeout(process, config.query.interval));
}
defineSchema()
.then(process);