From b489ca2cfa53fad07f6f919834fdd7b0c0c21004 Mon Sep 17 00:00:00 2001 From: GabrielRezeanu03 <99530668+GabrielRezeanu03@users.noreply.github.com> Date: Thu, 13 Jun 2024 21:15:40 +0300 Subject: [PATCH] Added 2 new scrapers for Qubiz and Orange --- sites/orange.py | 74 +++++++++++++++++++++++++++++++++++++++++++++++++ sites/qubiz.py | 65 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 sites/orange.py create mode 100644 sites/qubiz.py diff --git a/sites/orange.py b/sites/orange.py new file mode 100644 index 0000000..68945fb --- /dev/null +++ b/sites/orange.py @@ -0,0 +1,74 @@ +# +# +# +# Orange > https://www.orange.ro/jobs/joburi-disponibile + + +import requests +import uuid +from sites.website_scraper_api import WebsiteScraperAPI + +class OrangeScraper(WebsiteScraperAPI): + + """ + A class for scraping job data from Nagarro website. + """ + url = 'https://www.orange.ro/ux-admin/api/jobs/getJobs?&order=closing_date&direction=desc' + url_logo = 'https://www.orange.ro/imagini/orange-logo-static.svg' + company_name = 'Orange' + + def __init__(self): + """ + Defining de url, company name for the request and formatted data list for the jobs scrapped + """ + super().__init__(self.company_name, self.url, self.url_logo) + + def request_headers(self): + """ + Set the request headers. + """ + self.headers = { + 'Accept': 'application/json' + } + + def get_response(self): + """ + Send a GET request and retrieve the response. + """ + self.job_details = requests.get( + self.url, + headers=self.headers).json() + self.get_jobs_response(self.job_details) + + def scrape_jobs(self): + """ + Scrape job data from Orange website. + """ + self.job_titles = self.get_job_details(['title']) + self.job_cities = [job_city[0]['name'] for job_city in self.get_job_details(['location'])] + self.job_urls = self.get_job_details(['url']) + + self.format_data() + + def format_data(self): + job_country = 'Romania' + for job_title, job_url, job_city in zip(self.job_titles, self.job_urls): + self.create_jobs_dict(job_title, job_url, job_country, job_city) + print(self.create_jobs_dict) + + + def sent_to_future(self): + self.send_to_viitor() + + def return_data(self): + self.request_headers() + self.get_response() + self.scrape_jobs() + return self.formatted_data, self.company_name + +if __name__ == "__main__": + Orange = OrangeScraper() + Orange.request_headers() + Orange.get_response() + Orange.scrape_jobs() + Orange.send_to_viitor() diff --git a/sites/qubiz.py b/sites/qubiz.py new file mode 100644 index 0000000..69c8f71 --- /dev/null +++ b/sites/qubiz.py @@ -0,0 +1,65 @@ +# +# +# +# +## Qubiz > https://www.qubiz.com/jobs + +from sites.website_scraper_bs4 import BS4Scraper + +class QubizScraper(BS4Scraper): + + """ + A class for scraping job data from Qubiz website. + """ + url = 'https://www.qubiz.com/jobs' + url_logo = 'https://assets-global.website-files.com/603e16fd5761f8f7787bf39a/64491d149607dd73d0e80235_LogoWebsiteAnniversary.svg' + company_name = 'Qubiz' + + def __init__(self): + """ + Initialize the BS4Scraper class. + """ + super().__init__(self.company_name, self.url_logo) + + def get_response(self): + self.get_content(self.url) + + def scrape_jobs(self): + """ + Scrape job data from Qubiz website. + """ + + job_titles_elements = self.get_jobs_elements('class_', 'head-2-job-title') + job_urls_elements = self.get_jobs_elements('class_', 'button-blue---job-openings w-button') + job_cities_elements = self.get_jobs_elements('class_', 'text-block-12') + + self.job_titles = self.get_jobs_details_text(job_titles_elements) + self.job_urls = self.get_jobs_details_href(job_urls_elements) + self.job_cities = self.get_jobs_details_text(job_cities_elements) + + self.format_data() + + def sent_to_future(self): + self.send_to_viitor() + + def return_data(self): + self.get_response() + self.scrape_jobs() + return self.formatted_data, self.company_name + + def format_data(self): + """ + Iterate over all job details and send to the create jobs dictionary. + """ + job_country = "Romania" + for job_title, job_url, job_city in zip(self.job_titles, self.job_urls, self.job_cities): + self.create_jobs_dict(job_title, job_url, job_country, job_city, remote='on-site', county=None) + +if __name__ == "__main__": + Qubiz = QubizScraper() + Qubiz.get_response() + Qubiz.scrape_jobs() + # Qubiz.sent_to_future() + + +