Skip to content

Commit

Permalink
Fri, Aug 2, 2024, 10:50 PM +03:00
Browse files Browse the repository at this point in the history
  • Loading branch information
AbdoullahBougataya committed Aug 2, 2024
1 parent d6a93f8 commit 0243c37
Showing 1 changed file with 19 additions and 15 deletions.
34 changes: 19 additions & 15 deletions images_scrapper.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
import requests
from bs4 import BeautifulSoup
import scrapper

engine_name = "F-1"
goog_search = "https://www.google.com/search?sclient=psy-ab&client=ubuntu&hs=k5b&channel=fs&biw=1366&bih=648&noj=1&q=" + engine_name.replace(" ", "+") + "+rocket+engine+wikipedia"
titles, engines = scrapper()
for engine in engines:
engine_name = engine[0]
goog_search = "https://www.google.com/search?sclient=psy-ab&client=ubuntu&hs=k5b&channel=fs&biw=1366&bih=648&noj=1&q=" + engine_name.replace(" ", "+") + "+rocket+engine+wikipedia"


r = requests.get(goog_search)
r = requests.get(goog_search)

soup = BeautifulSoup(r.text, "html.parser")
search_result = soup.find_all("a")
for i in range(len(search_result)):
if "https://en.wikipedia.org/wiki/" in str(search_result[i]):
the_link = search_result[i]
break
if the_link:
wikipedia_link = str(the_link.get('href')).removeprefix("/url?q=").split("&")[0]
r = requests.get(wikipedia_link)
soup = BeautifulSoup(r.text, "html.parser")
img_data = requests.get("https:" + str(soup.find_all("img")[4].get("srcset").split()[2])).content
with open(f'./images/{engine_name}.jpg', 'wb') as handler:
handler.write(img_data)
search_result = soup.find_all("a")
for i in range(len(search_result)):
if "https://en.wikipedia.org/wiki/" in str(search_result[i]):
the_link = search_result[i]
break
if the_link:
wikipedia_link = str(the_link.get('href')).removeprefix("/url?q=").split("&")[0]
r = requests.get(wikipedia_link)
soup = BeautifulSoup(r.text, "html.parser")
img_data = requests.get("https:" + str(soup.find_all("img")[4].get("srcset").split()[2])).content
if img_data:
with open(f'images/{engine_name}.jpg', 'wb') as handler:
handler.write(img_data)

0 comments on commit 0243c37

Please sign in to comment.