forked from Devs-Garden/WebCrawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
server.js
42 lines (37 loc) · 1.28 KB
/
server.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
var Crawler = require("simplecrawler");
var crawler = new Crawler(process.argv[2]);
var fs = require("node-fs");
var path = require("path");
var url = require("url");
crawler.on("crawlstart", function() {
console.log("Crawl starting");
});
crawler.on("fetchstart", function(queueItem) {
console.log("fetchStart", queueItem);
});
crawler.on("fetchcomplete", function(queueItem, responseBuffer) {
var domain = url.parse(process.argv[2]).hostname;
var outputDirectory = path.join(__dirname, domain);
var parsed = url.parse(queueItem.url);
if (parsed.pathname === "/") {
parsed.pathname = "/index.html";
}
var dirname = outputDirectory + parsed.pathname.replace(/\/[^\/]+$/, "");
var filepath = outputDirectory + parsed.pathname;
// Check if DIR exists
fs.exists(dirname, function(exists) {
// If DIR exists, write file
if (exists) {
fs.writeFile(filepath, responseBuffer, function() {});
} else {
// Else, recursively create dir using node-fs, then write file
fs.mkdir(dirname, 0755, true, function() {
fs.writeFile(filepath, responseBuffer, function() {});
});
}
});
});
crawler.on("complete", function() {
console.log("Finished!");
});
crawler.start();