Skip to content

Commit

Permalink
Added API-related retries
Browse files Browse the repository at this point in the history
  • Loading branch information
kami4ka committed Dec 21, 2021
1 parent bcd9daf commit 080a170
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 156 deletions.
183 changes: 41 additions & 142 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@scrapingant/amazon-proxy-scraper",
"version": "2.0.2",
"version": "2.1.0",
"description": "Amazon products scraper by keyword with using ScrapingAnt API",
"main": "index.js",
"bin": {
Expand All @@ -21,11 +21,11 @@
"author": "ScrapingAnt",
"license": "MIT",
"dependencies": {
"@scrapingant/scrapingant-client": "0.0.1",
"cheerio": "^1.0.0-rc.5",
"@scrapingant/scrapingant-client": "0.1.0",
"cli-progress": "^3.9.0",
"json2csv": "^5.0.6",
"json2xls": "^0.1.2",
"promise-retry": "^2.0.1",
"yargs": "^16.2.0"
},
"devDependencies": {},
Expand Down
33 changes: 22 additions & 11 deletions products-scraper.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ const writeDataToCsv = require('./utils').writeDataToCsv;
const writeDataToXls = require('./utils').writeDataToXls;
const cliProgress = require('cli-progress');
const querystring = require('querystring');
const retry = require('promise-retry');
const ScrapingAntClient = require('@scrapingant/scrapingant-client')


const CONSTANTS = require('./constants');

class ProductsScraper {
Expand Down Expand Up @@ -131,15 +133,22 @@ class ProductsScraper {
...(this.currentSearchPage > 1 ? { page: this.currentSearchPage, ref: `sr_pg_${this.currentSearchPage}` } : {})
});

// Retry for avoiding empty or detected result from Amazon
for (let i = 0; i < CONSTANTS.limit.retry; i++) {
const response = await this.client.scrape(
`${this.host}/s?${queryParams}`,
{ proxy_country: this.country }
);
const pageBody = response.content;
const products = this.getProducts(pageBody);
if (Object.keys(products).length > 0) {
return products;
try {
// Retry for any network or accessibility cases
const response = await retry((attempt) => this.client.scrape(
`${this.host}/s?${queryParams}`,
{ proxy_country: this.country }
).catch(attempt), { retries: CONSTANTS.limit.retry });

const pageBody = response.content;
const products = this.getProducts(pageBody);
if (Object.keys(products).length > 0) {
return products;
}
} catch (err) {
console.error(`Failed to get page ${this.currentSearchPage} for keyword ${this.keyword}. Going to retry...`);
}
}

Expand Down Expand Up @@ -246,12 +255,14 @@ class ProductsScraper {
* The main idea of this method is pretty simple - amend existing products object with additional data
*/
async getProductPageData(amazonId) {
// Retry for avoiding empty or detected result from Amazon
for (let i = 0; i < CONSTANTS.limit.retry; i++) {
try {
const response = await this.client.scrape(
// Retry for any network or accessibility cases
const response = await retry((attempt) => this.client.scrape(
`${this.host}/dp/${amazonId}`,
{ proxy_country: this.country }
);
).catch(attempt), { retries: CONSTANTS.limit.retry });
const pageBody = response.content;

const dom = cheerio.load(pageBody.replace(/\s\s+/g, '').replace(/\n/g, ''));
Expand All @@ -270,7 +281,7 @@ class ProductsScraper {
}

} catch (exception) {
// Hiding the exception for retry
console.error(`Failed to get product ${amazonId} for keyword ${this.keyword}. Going to retry...`);
}
}
}
Expand Down

0 comments on commit 080a170

Please sign in to comment.