From e972e95298418f19fc41786b5544fe9a0085ad1b Mon Sep 17 00:00:00 2001 From: GabrielRezeanu03 <99530668+GabrielRezeanu03@users.noreply.github.com> Date: Mon, 3 Jun 2024 22:34:29 +0300 Subject: [PATCH] Added two new scraper scripts for Alumil and Fieldstar --- sites/alumil.py | 54 ++++++++++++++++++++++++++++++++++++++++++ sites/fieldstar.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 sites/alumil.py create mode 100644 sites/fieldstar.py diff --git a/sites/alumil.py b/sites/alumil.py new file mode 100644 index 0000000..4770c0c --- /dev/null +++ b/sites/alumil.py @@ -0,0 +1,54 @@ +from sites.website_scraper_bs4 import BS4Scraper + + +class alumilScraper(BS4Scraper): + """ + A class for scraping job data from alumil website. + """ + url = 'https://www.alumil.com/romania/corporate/careers/job-openings' # URL-ul paginii de cariere + url_logo = 'https://www.alumil.com/ResourcePackages/Alumil/assets/dist/images/logo.svg' + company_name = 'alumil' + + def __init__(self): + """ + Initialize the BS4Scraper class. + """ + super().__init__(self.company_name, self.url_logo) + + def get_response(self): + self.get_content(self.url) + + def scrape_jobs(self): + """ + Scrape job data from alumil website. + """ + job_title_elements = self.get_jobs_elements('class_', "article-title") + job_link_elements = self.get_jobs_elements('css_', "div.col-xs-1.article-arrow > a") + + self.job_titles = self.get_jobs_details_text(job_title_elements) + self.job_links = self.get_jobs_details_href(job_link_elements) + + self.format_data() + + def sent_to_future(self): + self.send_to_viitor() + + def return_data(self): + self.get_response() + self.scrape_jobs() + return self.formatted_data, self.company_name + + def format_data(self): + """ + Iterate over all job details and send to the create jobs dictionary. + """ + for job_title, job_link in zip(self.job_titles, self.job_links): + job_city = "București" # Hardcodăm orașul companiei -> București + self.create_jobs_dict(job_title, job_link, "România", [job_city]) + + +if __name__ == "__main__": + alumil = alumilScraper() + alumil.get_response() + alumil.scrape_jobs() + alumil.sent_to_future() diff --git a/sites/fieldstar.py b/sites/fieldstar.py new file mode 100644 index 0000000..83e9529 --- /dev/null +++ b/sites/fieldstar.py @@ -0,0 +1,59 @@ +from sites.website_scraper_bs4 import BS4Scraper + + +class fieldstarScraper(BS4Scraper): + """ + A class for scraping job data from fieldstar website. + """ + url = 'https://www.fieldstar.ro/j/' # URL-ul paginii de cariere + url_logo = 'https://www.fieldstar.ro/wp-content/uploads/2020/08/logo-fieldstar-horiz-white-300x73.png' + company_name = 'fieldstar' + + def __init__(self): + """ + Initialize the BS4Scraper class. + """ + super().__init__(self.company_name, self.url_logo) + + def get_response(self): + self.get_content(self.url) + + def scrape_jobs(self): + """ + Scrape job data from fieldstar website. + """ + job_title_elements = self.get_jobs_elements('class_', "awsm-job-post-title") + job_link_elements = self.get_jobs_elements('class_', "awsm-job-item") + + self.job_titles = self.get_jobs_details_text(job_title_elements) + self.job_links = self.get_jobs_details_href(job_link_elements) + + # Extragem și orașele + city_elements = self.get_jobs_elements('class_', "awsm-job-specification-locatie") + self.job_cities = self.get_jobs_details_text(city_elements) + + self.format_data() + + def sent_to_future(self): + self.send_to_viitor() + + def return_data(self): + self.get_response() + self.scrape_jobs() + return self.formatted_data, self.company_name + + def format_data(self): + """ + Iterate over all job details and send to the create jobs dictionary. + """ + for job_title, job_link, job_city in zip(self.job_titles, self.job_links, self.job_cities): + if job_city.lower() == 'remote': + job_city = 'all' + self.create_jobs_dict(job_title, job_link, "România", job_city.split()) + + +if __name__ == "__main__": + fieldstar = fieldstarScraper() + fieldstar.get_response() + fieldstar.scrape_jobs() + fieldstar.sent_to_future()