From 0243c37f253c7134ae89c2fb9bf4373a3b8e20e6 Mon Sep 17 00:00:00 2001 From: AbdoullahBougataya Date: Fri, 2 Aug 2024 19:50:21 +0000 Subject: [PATCH] Fri, Aug 2, 2024, 10:50 PM +03:00 --- images_scrapper.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/images_scrapper.py b/images_scrapper.py index cfc5360..1a9a8ec 100644 --- a/images_scrapper.py +++ b/images_scrapper.py @@ -1,22 +1,26 @@ import requests from bs4 import BeautifulSoup +import scrapper -engine_name = "F-1" -goog_search = "https://www.google.com/search?sclient=psy-ab&client=ubuntu&hs=k5b&channel=fs&biw=1366&bih=648&noj=1&q=" + engine_name.replace(" ", "+") + "+rocket+engine+wikipedia" +titles, engines = scrapper() +for engine in engines: + engine_name = engine[0] + goog_search = "https://www.google.com/search?sclient=psy-ab&client=ubuntu&hs=k5b&channel=fs&biw=1366&bih=648&noj=1&q=" + engine_name.replace(" ", "+") + "+rocket+engine+wikipedia" -r = requests.get(goog_search) + r = requests.get(goog_search) -soup = BeautifulSoup(r.text, "html.parser") -search_result = soup.find_all("a") -for i in range(len(search_result)): - if "https://en.wikipedia.org/wiki/" in str(search_result[i]): - the_link = search_result[i] - break -if the_link: - wikipedia_link = str(the_link.get('href')).removeprefix("/url?q=").split("&")[0] - r = requests.get(wikipedia_link) soup = BeautifulSoup(r.text, "html.parser") - img_data = requests.get("https:" + str(soup.find_all("img")[4].get("srcset").split()[2])).content - with open(f'./images/{engine_name}.jpg', 'wb') as handler: - handler.write(img_data) + search_result = soup.find_all("a") + for i in range(len(search_result)): + if "https://en.wikipedia.org/wiki/" in str(search_result[i]): + the_link = search_result[i] + break + if the_link: + wikipedia_link = str(the_link.get('href')).removeprefix("/url?q=").split("&")[0] + r = requests.get(wikipedia_link) + soup = BeautifulSoup(r.text, "html.parser") + img_data = requests.get("https:" + str(soup.find_all("img")[4].get("srcset").split()[2])).content + if img_data: + with open(f'images/{engine_name}.jpg', 'wb') as handler: + handler.write(img_data)