-
Notifications
You must be signed in to change notification settings - Fork 0
/
worker.js
55 lines (50 loc) · 1.41 KB
/
worker.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
const { parentPort } = require('worker_threads');
const config = require('./config');
const axios = require('axios');
const jsdom = require('jsdom');
const BluebirdPromise = require('bluebird');
const { JSDOM, VirtualConsole } = jsdom;
const sleep = ms => new Promise(r => setTimeout(r, ms));
parentPort.on('message', processQueue);
function processQueue(urls) {
BluebirdPromise.map(
urls,
processQueueUrl,
{concurrency: 2}
)
}
async function processQueueUrl(url) {
const html = await requestUrl(url);
if (!html) return;
const dom = new JSDOM(
html,
{
beforeParse: (window) => {
window.performance.timing = {responseStart: new Date(), navigationStart: new Date()};
},
runScripts: 'dangerously',
resources: 'usable',
url,
referrer: config.baseUrl,
virtualConsole: new VirtualConsole
}
);
await sleep(10000); // HACK: Wait for dynamic JS to execute
getUrlsFromDom(dom, url);
dom.window.close();
}
function getUrlsFromDom(dom) {
const aTagList = dom.window.document.querySelectorAll('a');
const aTagsArray = Array.from(aTagList);
const urls = aTagsArray.map(a => a.href);
parentPort.postMessage(urls);
};
async function requestUrl(url) {
try {
console.log(`Requesting page ${url}`);
const { data } = await axios.get(url);
return data;
} catch(e) {
console.log(`Error requesting ${url} Response code: ${e.code}`);
}
};