From a3debe3301bd4e8d32d59f8dee400290a1b5b14c Mon Sep 17 00:00:00 2001 From: serafettin Date: Sat, 25 May 2024 10:45:24 +0300 Subject: [PATCH] Update index.js --- index.js | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/index.js b/index.js index 657209e..9b84ee4 100644 --- a/index.js +++ b/index.js @@ -28,11 +28,27 @@ const initializeCrawler = async () => { minConcurrency: 5, maxConcurrency: 15, launchContext: { + launchOptions: { + defaultViewport: { + width: 1512, + height: 982, + }, + }, userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", }, requestHandler: async ({ request, page }) => { await page.waitForNetworkIdle(); + + await Promise.all([ + page + .evaluate(() => window.scrollBy(0, window.innerHeight)) + .then(() => page.waitForTimeout(152)), + page + .evaluate(() => window.scrollBy(0, window.innerHeight)) + .then(() => page.waitForTimeout(204)), + ]); + const content = await page.content(); console.log(`Title: ${await page.title()}`); console.log(`Content: ${content}`); @@ -49,7 +65,7 @@ const addToQueue = async (queue, url, uniqueKey) => { await queue.addRequests([{ url, uniqueKey }]); }; -const getContent = async (uniqueKey, maxRetries = 10, delay = 1000) => { +const getContent = async (uniqueKey, maxRetries = 20, delay = 1000) => { for (let i = 0; i < maxRetries; i++) { if (contentMap.has(uniqueKey)) { const content = contentMap.get(uniqueKey);