-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathscraper.js
62 lines (55 loc) · 1.6 KB
/
scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
const scraper = require('immowelt-scraper');
const _ = require('underscore');
const store = require('./storage/store');
module.exports.ImmoweltScraper = function(scrapeTarget, storage) {
var city = scrapeTarget;
var entryStore = storage;
var maxPage = 10;
var currentPage = 1;
var retryOnFail = true;
var initialScrape = false;
var runningPromise = null;
this.start = function(isInitialScrape) {
console.log("Starting to scrape Immowelt");
currentPage = 1;
maxPage = 10;
retryOnFail = true;
initialScrape = isInitialScrape;
scrapePage(currentPage);
};
var stop = function() {
retryOnFail = false;
runningPromise = null
console.log("Scraping stopped");
}
var onPromiseSuccess = function (result) {
if (initialScrape) {
maxPage = result['pagination']['totalPages'];
initialScrape = false;
}
var addedCount = entryStore.addEntries(result['items']);
if (currentPage < maxPage && addedCount > 0) {
++currentPage;
var delay = _.random(5, 30) * 1000;
console.log("Delaying " + delay / 1000 + " seconds.");
_.delay(scrapePage, delay, currentPage);
} else {
stop();
}
};
var onPromiseReject = function(reason) {
console.log(reason);
if (retryOnFail) {
scrapePage(currentPage);
} else {
return;
}
};
var scrapePage = function(page) {
console.log("Scraping page " + page);
runningPromise = scraper.scrapCity(city, page).then(
function(result) { onPromiseSuccess(result); },
function(reason) { onPromiseReject(reason); }
);
};
};