Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added two new scraper scripts for Alumil and Fieldstar #226

Merged
merged 1 commit into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions sites/alumil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from sites.website_scraper_bs4 import BS4Scraper


class alumilScraper(BS4Scraper):
"""
A class for scraping job data from alumil website.
"""
url = 'https://www.alumil.com/romania/corporate/careers/job-openings' # URL-ul paginii de cariere
url_logo = 'https://www.alumil.com/ResourcePackages/Alumil/assets/dist/images/logo.svg'
company_name = 'alumil'

def __init__(self):
"""
Initialize the BS4Scraper class.
"""
super().__init__(self.company_name, self.url_logo)

def get_response(self):
self.get_content(self.url)

def scrape_jobs(self):
"""
Scrape job data from alumil website.
"""
job_title_elements = self.get_jobs_elements('class_', "article-title")
job_link_elements = self.get_jobs_elements('css_', "div.col-xs-1.article-arrow > a")

self.job_titles = self.get_jobs_details_text(job_title_elements)
self.job_links = self.get_jobs_details_href(job_link_elements)

self.format_data()

def sent_to_future(self):
self.send_to_viitor()

def return_data(self):
self.get_response()
self.scrape_jobs()
return self.formatted_data, self.company_name

def format_data(self):
"""
Iterate over all job details and send to the create jobs dictionary.
"""
for job_title, job_link in zip(self.job_titles, self.job_links):
job_city = "București" # Hardcodăm orașul companiei -> București
self.create_jobs_dict(job_title, job_link, "România", [job_city])


if __name__ == "__main__":
alumil = alumilScraper()
alumil.get_response()
alumil.scrape_jobs()
alumil.sent_to_future()
59 changes: 59 additions & 0 deletions sites/fieldstar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from sites.website_scraper_bs4 import BS4Scraper


class fieldstarScraper(BS4Scraper):
"""
A class for scraping job data from fieldstar website.
"""
url = 'https://www.fieldstar.ro/j/' # URL-ul paginii de cariere
url_logo = 'https://www.fieldstar.ro/wp-content/uploads/2020/08/logo-fieldstar-horiz-white-300x73.png'
company_name = 'fieldstar'

def __init__(self):
"""
Initialize the BS4Scraper class.
"""
super().__init__(self.company_name, self.url_logo)

def get_response(self):
self.get_content(self.url)

def scrape_jobs(self):
"""
Scrape job data from fieldstar website.
"""
job_title_elements = self.get_jobs_elements('class_', "awsm-job-post-title")
job_link_elements = self.get_jobs_elements('class_', "awsm-job-item")

self.job_titles = self.get_jobs_details_text(job_title_elements)
self.job_links = self.get_jobs_details_href(job_link_elements)

# Extragem și orașele
city_elements = self.get_jobs_elements('class_', "awsm-job-specification-locatie")
self.job_cities = self.get_jobs_details_text(city_elements)

self.format_data()

def sent_to_future(self):
self.send_to_viitor()

def return_data(self):
self.get_response()
self.scrape_jobs()
return self.formatted_data, self.company_name

def format_data(self):
"""
Iterate over all job details and send to the create jobs dictionary.
"""
for job_title, job_link, job_city in zip(self.job_titles, self.job_links, self.job_cities):
if job_city.lower() == 'remote':
job_city = 'all'
self.create_jobs_dict(job_title, job_link, "România", job_city.split())


if __name__ == "__main__":
fieldstar = fieldstarScraper()
fieldstar.get_response()
fieldstar.scrape_jobs()
fieldstar.sent_to_future()
Loading