Skip to content

Commit

Permalink
feat: improve handling of queryVars.
Browse files Browse the repository at this point in the history
  • Loading branch information
lewisdonovan committed Oct 27, 2024
1 parent 54b89ec commit 05edb88
Show file tree
Hide file tree
Showing 11 changed files with 29 additions and 33 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ An object of additional query params to add to the Google News URL string, forma
const articles = await googleNewsScraper({
searchTerm: "Últimas noticias en Madrid",
queryVars: {
hl:"es-ES",
gl:"ES",
ceid:"ES:es"
},
Expand Down
11 changes: 5 additions & 6 deletions dist/cjs/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2725,14 +2725,14 @@ const googleNewsScraper = (userConfig) => __awaiter(void 0, void 0, void 0, func
limit: 99
}, userConfig);
const logger = getLogger(config.logLevel);
const queryVars = (_a = config.queryVars) !== null && _a !== void 0 ? _a : {};
const queryVars = config.queryVars
? Object.assign(Object.assign({}, config.queryVars), { when: config.timeframe }) : { when: config.timeframe };
if (userConfig.searchTerm) {
queryVars.q = userConfig.searchTerm;
}
const queryString = queryVars ? buildQueryString(queryVars) : '';
const queryString = (_a = buildQueryString(queryVars)) !== null && _a !== void 0 ? _a : '';
const baseUrl = (_b = config.baseUrl) !== null && _b !== void 0 ? _b : `https://news.google.com/search`;
const timeString = config.timeframe ? ` when:${config.timeframe}` : '';
const url = `${baseUrl}${queryString}${timeString}`;
const url = `${baseUrl}${queryString}`;
logger.info(`📰 SCRAPING NEWS FROM: ${url}`);
const requiredArgs = [
'--disable-extensions-except=/path/to/manifest/folder/',
Expand Down Expand Up @@ -2778,15 +2778,14 @@ const googleNewsScraper = (userConfig) => __awaiter(void 0, void 0, void 0, func
const $ = cheerio__namespace.load(content);
const articles = $('article');
let results = [];
$(articles).each(function () {
$(articles).each(function (i) {
var _a, _b, _c, _d, _e, _f, _g, _h, _j;
const link = ((_c = (_b = (_a = $(this)) === null || _a === void 0 ? void 0 : _a.find('a[href^="./article"]')) === null || _b === void 0 ? void 0 : _b.attr('href')) === null || _c === void 0 ? void 0 : _c.replace('./', 'https://news.google.com/')) || ((_f = (_e = (_d = $(this)) === null || _d === void 0 ? void 0 : _d.find('a[href^="./read"]')) === null || _e === void 0 ? void 0 : _e.attr('href')) === null || _f === void 0 ? void 0 : _f.replace('./', 'https://news.google.com/')) || "";
const srcset = (_g = $(this).find('figure').find('img').attr('srcset')) === null || _g === void 0 ? void 0 : _g.split(' ');
const image = srcset && srcset.length
? srcset[srcset.length - 2]
: $(this).find('figure').find('img').attr('src');
const articleType = getArticleType($(this));
// TODO: Done up to here
const title = getTitle($(this), articleType);
const mainArticle = {
title,
Expand Down
2 changes: 1 addition & 1 deletion dist/cjs/min/index.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/cjs/min/index.min.js.map

Large diffs are not rendered by default.

11 changes: 5 additions & 6 deletions dist/esm/index.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -2704,14 +2704,14 @@ const googleNewsScraper = (userConfig) => __awaiter(void 0, void 0, void 0, func
limit: 99
}, userConfig);
const logger = getLogger(config.logLevel);
const queryVars = (_a = config.queryVars) !== null && _a !== void 0 ? _a : {};
const queryVars = config.queryVars
? Object.assign(Object.assign({}, config.queryVars), { when: config.timeframe }) : { when: config.timeframe };
if (userConfig.searchTerm) {
queryVars.q = userConfig.searchTerm;
}
const queryString = queryVars ? buildQueryString(queryVars) : '';
const queryString = (_a = buildQueryString(queryVars)) !== null && _a !== void 0 ? _a : '';
const baseUrl = (_b = config.baseUrl) !== null && _b !== void 0 ? _b : `https://news.google.com/search`;
const timeString = config.timeframe ? ` when:${config.timeframe}` : '';
const url = `${baseUrl}${queryString}${timeString}`;
const url = `${baseUrl}${queryString}`;
logger.info(`📰 SCRAPING NEWS FROM: ${url}`);
const requiredArgs = [
'--disable-extensions-except=/path/to/manifest/folder/',
Expand Down Expand Up @@ -2757,15 +2757,14 @@ const googleNewsScraper = (userConfig) => __awaiter(void 0, void 0, void 0, func
const $ = cheerio.load(content);
const articles = $('article');
let results = [];
$(articles).each(function () {
$(articles).each(function (i) {
var _a, _b, _c, _d, _e, _f, _g, _h, _j;
const link = ((_c = (_b = (_a = $(this)) === null || _a === void 0 ? void 0 : _a.find('a[href^="./article"]')) === null || _b === void 0 ? void 0 : _b.attr('href')) === null || _c === void 0 ? void 0 : _c.replace('./', 'https://news.google.com/')) || ((_f = (_e = (_d = $(this)) === null || _d === void 0 ? void 0 : _d.find('a[href^="./read"]')) === null || _e === void 0 ? void 0 : _e.attr('href')) === null || _f === void 0 ? void 0 : _f.replace('./', 'https://news.google.com/')) || "";
const srcset = (_g = $(this).find('figure').find('img').attr('srcset')) === null || _g === void 0 ? void 0 : _g.split(' ');
const image = srcset && srcset.length
? srcset[srcset.length - 2]
: $(this).find('figure').find('img').attr('src');
const articleType = getArticleType($(this));
// TODO: Done up to here
const title = getTitle($(this), articleType);
const mainArticle = {
title,
Expand Down
2 changes: 1 addition & 1 deletion dist/esm/index.mjs.map

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/esm/min/index.min.mjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/esm/min/index.min.mjs.map

Large diffs are not rendered by default.

11 changes: 5 additions & 6 deletions dist/tsc/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ const googleNewsScraper = (userConfig) => __awaiter(void 0, void 0, void 0, func
limit: 99
}, userConfig);
const logger = getLogger(config.logLevel);
const queryVars = (_a = config.queryVars) !== null && _a !== void 0 ? _a : {};
const queryVars = config.queryVars
? Object.assign(Object.assign({}, config.queryVars), { when: config.timeframe }) : { when: config.timeframe };
if (userConfig.searchTerm) {
queryVars.q = userConfig.searchTerm;
}
const queryString = queryVars ? buildQueryString(queryVars) : '';
const queryString = (_a = buildQueryString(queryVars)) !== null && _a !== void 0 ? _a : '';
const baseUrl = (_b = config.baseUrl) !== null && _b !== void 0 ? _b : `https://news.google.com/search`;
const timeString = config.timeframe ? ` when:${config.timeframe}` : '';
const url = `${baseUrl}${queryString}${timeString}`;
const url = `${baseUrl}${queryString}`;
logger.info(`📰 SCRAPING NEWS FROM: ${url}`);
const requiredArgs = [
'--disable-extensions-except=/path/to/manifest/folder/',
Expand Down Expand Up @@ -83,7 +83,7 @@ const googleNewsScraper = (userConfig) => __awaiter(void 0, void 0, void 0, func
let results = [];
let i = 0;
const urlChecklist = [];
$(articles).each(function () {
$(articles).each(function (i) {
var _a, _b, _c, _d, _e, _f, _g, _h, _j;
const link = ((_c = (_b = (_a = $(this)) === null || _a === void 0 ? void 0 : _a.find('a[href^="./article"]')) === null || _b === void 0 ? void 0 : _b.attr('href')) === null || _c === void 0 ? void 0 : _c.replace('./', 'https://news.google.com/')) || ((_f = (_e = (_d = $(this)) === null || _d === void 0 ? void 0 : _d.find('a[href^="./read"]')) === null || _e === void 0 ? void 0 : _e.attr('href')) === null || _f === void 0 ? void 0 : _f.replace('./', 'https://news.google.com/')) || "";
link && urlChecklist.push(link);
Expand All @@ -92,7 +92,6 @@ const googleNewsScraper = (userConfig) => __awaiter(void 0, void 0, void 0, func
? srcset[srcset.length - 2]
: $(this).find('figure').find('img').attr('src');
const articleType = getArticleType($(this));
// TODO: Done up to here
const title = getTitle($(this), articleType);
const mainArticle = {
title,
Expand Down
2 changes: 1 addition & 1 deletion src/buildQueryString.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { QueryVars } from "./types"

const buildQueryString = (query: QueryVars) => {
const buildQueryString = ( query: QueryVars ) => {

// Bail if there's nothing in the Object
if (Object.keys(query).length === 0) return "";
Expand Down
16 changes: 8 additions & 8 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,16 @@ const googleNewsScraper = async (userConfig: GNSUserConfig) => {

const logger = getLogger(config.logLevel);

const queryVars: QueryVars = config.queryVars ?? {};
const queryVars: QueryVars = config.queryVars
? { ...config.queryVars, when: config.timeframe }
: { when: config.timeframe};
if (userConfig.searchTerm) {
queryVars.q = userConfig.searchTerm;
}

const queryString = queryVars ? buildQueryString(queryVars) : '';
const queryString = buildQueryString(queryVars) ?? '';
const baseUrl = config.baseUrl ?? `https://news.google.com/search`;
const timeString = config.timeframe ? ` when:${config.timeframe}` : '';
const url = `${baseUrl}${queryString}${timeString}`;
const url = `${baseUrl}${queryString}`;

logger.info(`📰 SCRAPING NEWS FROM: ${url}`);
const requiredArgs = [
Expand Down Expand Up @@ -89,18 +90,17 @@ const googleNewsScraper = async (userConfig: GNSUserConfig) => {

const articles = $('article');
let results: Articles = [];
let i = 0
const urlChecklist = []
let i = 0;
const urlChecklist = [];

$(articles).each(function () {
$(articles).each(function() {
const link = $(this)?.find('a[href^="./article"]')?.attr('href')?.replace('./', 'https://news.google.com/') || $(this)?.find('a[href^="./read"]')?.attr('href')?.replace('./', 'https://news.google.com/') || ""
link && urlChecklist.push(link);
const srcset = $(this).find('figure').find('img').attr('srcset')?.split(' ');
const image = srcset && srcset.length
? srcset[srcset.length - 2]
: $(this).find('figure').find('img').attr('src');
const articleType = getArticleType($(this));
// TODO: Done up to here

const title = getTitle($(this), articleType);
const mainArticle: Article = {
Expand Down

0 comments on commit 05edb88

Please sign in to comment.