-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetch.js
84 lines (78 loc) · 2.46 KB
/
fetch.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
var fs = require('fs');
var htmlparser = require('htmlparser');
var log = require('./log');
var needle = require('needle');
var path = require('path');
var StatsD = require('node-statsd');
var util = require('util');
var statsd = new StatsD({
prefix: 'price-for-asin.fetch.',
host: process.env.STATSD_HOST || 'localhost'
});
function constructUrl(asin) {
return "http://www.amazon.co.uk/gp/aw/s/ref=is_box_?k=" + asin; // mobile site
}
function dumpHtml(html, error, callback) {
statsd.increment('html_dumped');
var filename = path.join('out', (new Date()).toISOString() + '_dump.html');
fs.writeFile(filename, html, function(err) {
log.info({filename: filename}, 'Write HTML dump file');
if (err) {
log.error({filename: filename, err: err}, 'Error writing HTML dump file');
}
callback(error);
});
}
function parsePage(html, callback) {
statsd.increment('pages_parsed');
var handler = new htmlparser.DefaultHandler(function (error, dom) {
if (error) {
return dumpHtml(html, error, callback);
}
if (!dom) {
return dumpHtml(html, new Error("No DOM object!"), callback);
}
var price;
try {
var bolds = htmlparser.DomUtils.getElementsByTagName("b", dom);
price = bolds[2].children[0].data;
} catch (e) {
return dumpHtml(html, e, callback);
}
if (price[0] != '£') {
return dumpHtml(html, new Error("Expected currency symbol to be £; got " + price[0]), callback);
}
price = parseFloat(price.slice(1));
if (isNaN(price)) {
var errMsg = "Failed to convert price string to float; result is NaN";
log.error({price: price}, errMsg);
callback(new Error(errMsg));
}
var result = {
price: price,
currency: "GBP"
};
callback(null, result);
});
var parser = new htmlparser.Parser(handler);
parser.parseComplete(html);
}
function fetchPriceForAsin(asin, callback){
statsd.increment('requests_made');
log.info({asin: asin}, "fetchPriceForAsin");
var options = { follow_max: 5 };
needle.get(constructUrl(asin), options, function(err, response) {
if (err) {
return callback(err);
}
if (response.statusCode != 200) {
log.error({status_code: response.statusCode }, util.format("ERROR: status code %s", response.statusCode));
log.debug({body: response.body, headers: response.headers}, "Response for unsuccessful request");
return callback(new Error("Expected status code 200; got " + response.statusCode));
}
parsePage(response.body, callback);
});
}
module.exports = {
fetchPriceForAsin: fetchPriceForAsin
};