Merge branch 'main' of https://github.com/peviitor-ro/JobsScrapers

peviitor-ro · Jul 7, 2024 · 7fc162c · 7fc162c
2 parents eb7ff3f + 805eab5
commit 7fc162c
Show file tree

Hide file tree

Showing 3 changed files with 207 additions and 0 deletions.
diff --git a/sites/orange.py b/sites/orange.py
@@ -0,0 +1,74 @@
+#
+#
+#
+# Orange > https://www.orange.ro/jobs/joburi-disponibile
+
+
+import requests
+import uuid
+from sites.website_scraper_api import WebsiteScraperAPI
+
+class OrangeScraper(WebsiteScraperAPI):
+
+    """
+    A class for scraping job data from Nagarro website.
+    """
+    url = 'https://www.orange.ro/ux-admin/api/jobs/getJobs?&order=closing_date&direction=desc'
+    url_logo = 'https://www.orange.ro/imagini/orange-logo-static.svg'
+    company_name = 'Orange'
+
+    def __init__(self):
+        """
+        Defining de url, company name for the request and formatted data list for the jobs scrapped
+        """
+        super().__init__(self.company_name, self.url, self.url_logo)
+
+    def request_headers(self):
+        """
+        Set the request headers.
+        """
+        self.headers = {
+            'Accept': 'application/json'
+        }
+
+    def get_response(self):
+        """
+        Send a GET request and retrieve the response.
+        """
+        self.job_details = requests.get(
+            self.url,
+            headers=self.headers).json()
+        self.get_jobs_response(self.job_details)
+
+    def scrape_jobs(self):
+        """
+        Scrape job data from Orange website.
+        """
+        self.job_titles = self.get_job_details(['title'])
+        self.job_cities = [job_city[0]['name'] for job_city in self.get_job_details(['location'])]
+        self.job_urls = self.get_job_details(['url'])
+
+        self.format_data()
+
+    def format_data(self):
+        job_country = 'Romania'   
+        for job_title, job_url, job_city in zip(self.job_titles, self.job_urls):
+            self.create_jobs_dict(job_title, job_url, job_country, job_city)
+
+
+
+    def sent_to_future(self):
+        self.send_to_viitor()
+
+    def return_data(self):
+        self.request_headers()
+        self.get_response()
+        self.scrape_jobs()
+        return self.formatted_data, self.company_name
+
+if __name__ == "__main__":
+    Orange = OrangeScraper()
+    Orange.request_headers()
+    Orange.get_response()
+    Orange.scrape_jobs()
+    Orange.send_to_viitor()
diff --git a/sites/qubiz.py b/sites/qubiz.py
@@ -0,0 +1,65 @@
+#
+#
+#
+#
+## Qubiz > https://www.qubiz.com/jobs
+
+from sites.website_scraper_bs4 import BS4Scraper
+
+class QubizScraper(BS4Scraper):
+
+    """
+    A class for scraping job data from Qubiz website.
+    """
+    url = 'https://www.qubiz.com/jobs'
+    url_logo = 'https://assets-global.website-files.com/603e16fd5761f8f7787bf39a/64491d149607dd73d0e80235_LogoWebsiteAnniversary.svg'
+    company_name = 'Qubiz'
+
+    def __init__(self):
+        """
+        Initialize the BS4Scraper class.
+        """
+        super().__init__(self.company_name, self.url_logo)
+
+    def get_response(self):
+        self.get_content(self.url)
+
+    def scrape_jobs(self):
+        """
+        Scrape job data from Qubiz website.
+        """
+
+        job_titles_elements = self.get_jobs_elements('class_', 'head-2-job-title')
+        job_urls_elements = self.get_jobs_elements('class_', 'button-blue---job-openings w-button')
+        job_cities_elements = self.get_jobs_elements('class_', 'text-block-12')
+
+        self.job_titles = self.get_jobs_details_text(job_titles_elements)
+        self.job_urls = self.get_jobs_details_href(job_urls_elements)
+        self.job_cities = self.get_jobs_details_text(job_cities_elements)
+
+        self.format_data()
+
+    def sent_to_future(self):
+        self.send_to_viitor()
+
+    def return_data(self):
+        self.get_response()
+        self.scrape_jobs()
+        return self.formatted_data, self.company_name
+
+    def format_data(self):
+        """
+        Iterate over all job details and send to the create jobs dictionary.
+        """
+        job_country = "Romania"
+        for job_title, job_url, job_city in zip(self.job_titles, self.job_urls, self.job_cities):
+            self.create_jobs_dict(job_title, job_url, job_country, job_city, remote='on-site', county=None)
+
+if __name__ == "__main__":
+    Qubiz = QubizScraper()
+    Qubiz.get_response()
+    Qubiz.scrape_jobs()
+    Qubiz.sent_to_future()
+
+
+
diff --git a/sites/softlead.py b/sites/softlead.py
@@ -0,0 +1,68 @@
+#
+#
+#
+#
+## Softlead > https://softlead.ro/cariere
+
+from sites.website_scraper_bs4 import BS4Scraper
+
+class SoftleadScraper(BS4Scraper):
+
+    """
+    A class for scraping job data from Softlead website.
+    """
+    url = 'https://softlead.ro/cariere'
+    url_logo = 'https://softlead.ro/frontendAssets/images/logo.webp'
+    company_name = 'Softlead'
+    base_url = 'https://softlead.ro'  # Define the base URL
+
+    def __init__(self):
+        """
+        Initialize the BS4Scraper class.
+        """
+        super().__init__(self.company_name, self.url_logo)
+
+    def get_response(self):
+        self.get_content(self.url)
+
+    def scrape_jobs(self):
+        """
+        Scrape job data from Softlead website.
+        """
+
+        job_titles_elements = self.get_jobs_elements('class_', 'blog-title')
+        job_urls_elements = self.get_jobs_elements('class_', 'readmore')    
+
+        self.job_titles = self.get_jobs_details_text(job_titles_elements)
+        self.job_urls = self.get_jobs_details_href(job_urls_elements)
+
+        # Convert relative URLs to absolute URLs
+        self.job_urls = [self.base_url + job_url for job_url in self.job_urls]
+
+        self.format_data()
+
+    def sent_to_future(self):
+        self.send_to_viitor()
+
+    def return_data(self):
+        self.get_response()
+        self.scrape_jobs()
+        return self.formatted_data, self.company_name
+
+    def format_data(self):
+        """
+        Iterate over all job details and send to the create jobs dictionary.
+        """
+
+        for job_title, job_url in zip(self.job_titles, self.job_urls):
+            self.create_jobs_dict(job_title, job_url, "România", "Bucuresti")
+
+
+if __name__ == "__main__":
+    Softlead = SoftleadScraper()
+    Softlead.get_response()
+    Softlead.scrape_jobs()
+    Softlead.sent_to_future()
+
+
+