diff --git a/Code/Database/schema/srijas.sql b/Code/Database/schema/srijas.sql index e2972fd..855571c 100644 --- a/Code/Database/schema/srijas.sql +++ b/Code/Database/schema/srijas.sql @@ -181,7 +181,7 @@ INSERT INTO `skill_master` (`skill_id`, `skill_title`, `is_active`, `created_by` (50, 'Hadoop', b'1', -1, '2021-11-01 04:44:31', 0, '2021-11-01 04:44:31'), (51, 'Kafka', b'1', -1, '2021-11-01 04:44:31', 0, '2021-11-01 04:44:31'), (52, 'Cassandra', b'1', -1, '2021-11-01 04:44:31', 0, '2021-11-01 04:44:31'), -(53, 'Elasticeearch', b'1', -1, '2021-11-01 04:44:31', 0, '2021-11-01 04:44:31'); +(53, 'Elasticsearch', b'1', -1, '2021-11-01 04:44:31', 0, '2021-11-01 04:44:31'); -- -------------------------------------------------------- diff --git a/Code/Scrapper/Scrapper_main.py b/Code/Scrapper/Scrapper_main.py index e0ef6f7..4630b90 100644 --- a/Code/Scrapper/Scrapper_main.py +++ b/Code/Scrapper/Scrapper_main.py @@ -48,21 +48,21 @@ def get_location(connection): cursor=connection.cursor() cursor.execute(sql_select_query) records2=cursor.fetchall() - return records2[0] + return records2[-1][0] def get_threshold(connection): sql_select_query = "select user_threshold from user_master um join user_resume ur where um.user_id=ur.user_id" cursor=connection.cursor() cursor.execute(sql_select_query) records2=cursor.fetchall() - return records2[0] + return records2[-1][0] def get_role(connection): sql_select_query = "select job_title from job_master jm join user_master um where jm.job_id=um.user_preferred_job_id" cursor=connection.cursor() cursor.execute(sql_select_query) records2=cursor.fetchall() - return records2[0] + return records2[-1][0] def get_resume_skills(connection): sql_select_Query2="select resume_id,skill_id from resume_skills where is_active=1" @@ -102,16 +102,18 @@ def get_emailing_list(connection): #print(resume_skills) email_id_list = get_emailing_list(connection) # print(email_list) - location = get_location(connection) - role = get_role(connection) + location = str(get_location(connection)) + role = str(get_role(connection)) + print(role) no_of_jobs_to_retrieve = 5 - match_threshold = get_threshold(connection) - # final_result_linkedIn = sl.get_job_description(resume_skills,all_skills, match_threshold, role, location, no_of_jobs_to_retrieve, data) - final_result_glassdoor = sg.get_job_description(resume_skills,all_skills, match_threshold, role, location, no_of_jobs_to_retrieve, data) - # final_result_indeed = si.get_job_description(resume_skills,all_skills, match_threshold, role, location, no_of_jobs_to_retrieve, data) - - # final_results = final_result_linkedIn + final_result_glassdoor + final_result_indeed + match_threshold = int(get_threshold(connection)) + # role_name_linkedIn, final_result_linkedIn = sl.get_job_description(resume_skills,all_skills, match_threshold, role, location, no_of_jobs_to_retrieve, data) + job_role, final_result_glassdoor = sg.get_job_description(resume_skills,all_skills, match_threshold, role, location, no_of_jobs_to_retrieve, data) + # role_name_indeed, final_result_indeed = si.get_job_description(resume_skills,all_skills, match_threshold, role, location, no_of_jobs_to_retrieve, data) print(final_result_glassdoor) - ea.sendmail(final_result_glassdoor,email_id_list) + # final_results = final_result_linkedIn + final_result_glassdoor + final_result_indeed + # role_name = role_name_linkedIn + role_name_glassdoor + role_name_indeed + + # ea.sendmail(final_result_linkedIn,email_id_list,role_name_linkedIn) diff --git a/Code/Scrapper/email_alert.py b/Code/Scrapper/email_alert.py index 9496c4f..4237ad0 100644 --- a/Code/Scrapper/email_alert.py +++ b/Code/Scrapper/email_alert.py @@ -4,7 +4,7 @@ from socket import gaierror import json -def sendmail(final_result,email_id_list): +def sendmail(final_result,email_id_list, job_role): port = 587 smtp_server = "smtp.gmail.com" login = "srijas.alerts@gmail.com" @@ -29,8 +29,8 @@ def sendmail(final_result,email_id_list): print(link) pre = """View Position""" - temp_str += (str(counter) + ". " + pre + embedded_link + post+ '\n') + post = """'>Click here""" + temp_str += (str(counter) + ". " + job_role[counter-1] + ': ' + pre + embedded_link + post+ '\n') counter += 1 body += temp_str msg.attach(MIMEText(body, 'html')) diff --git a/Code/Scrapper/scrapper_glassdoor.py b/Code/Scrapper/scrapper_glassdoor.py index 8b600f4..7eea179 100644 --- a/Code/Scrapper/scrapper_glassdoor.py +++ b/Code/Scrapper/scrapper_glassdoor.py @@ -14,8 +14,8 @@ def get_job_description(resume_skills,all_skills, match_threshold, role, location, no_of_jobs_to_retrieve, data): options = Options() - options.add_argument("--window-size-1920,1200") - options.add_argument('--headless') + # options.add_argument("--window-size-1920,1200") + options.headless = True options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') driver = webdriver.Chrome (options=options,executable_path=ChromeDriverManager().install()) @@ -24,7 +24,7 @@ def get_job_description(resume_skills,all_skills, match_threshold, role, locatio job_urls = [] c=0 job_buttons = driver.find_elements_by_xpath('.//a[@class = "jobLink job-search-key-1rd3saf eigr9kq1"]') #jl for Job Listing. These are the buttons we're going to click. - # time.sleep(2) + time.sleep(2) print(len(job_buttons)) for text in job_buttons: if text.get_attribute('href'): ### get all the job postings URL's @@ -36,8 +36,9 @@ def get_job_description(resume_skills,all_skills, match_threshold, role, locatio final_dict = {} # ========== Iterate through each url and get the job description ================================= - + job_role = [] for i in job_urls: + time.sleep(5) jobs = [] driver.get(i) button = driver.find_element_by_xpath('//*[@id="JobDescriptionContainer"]/div[2]') @@ -45,8 +46,11 @@ def get_job_description(resume_skills,all_skills, match_threshold, role, locatio job_description = driver.find_element_by_xpath('//*[@id="JobDescriptionContainer"]/div[1]').text jobs.append(job_description) final_dict[i] = job_description + job_title=driver.find_element_by_xpath("//div[@class='css-17x2pwl e11nt52q6']").text + company_details=driver.find_element_by_xpath("//div[@class='css-16nw49e e11nt52q1']").text + job_role.append(job_title,company_details) final_result = ke.get_user_id_to_list_of_job_ids(resume_skills,final_dict,all_skills,match_threshold) - return final_result + return job_role, final_result diff --git a/Code/Scrapper/scrapper_linkedIn.py b/Code/Scrapper/scrapper_linkedIn.py index 31f91ef..c43631d 100644 --- a/Code/Scrapper/scrapper_linkedIn.py +++ b/Code/Scrapper/scrapper_linkedIn.py @@ -18,17 +18,20 @@ def get_job_description(resume_skills,all_skills, match_threshold, role, locatio # role = role.replace(' ', '%20') #Form a dynamic URL to fetch the details using Beautiful soup for the given filters url = "https://www.linkedin.com/jobs/jobs-in-"+location+"?keywords="+role+"&f_JT=F%2CP&f_E=1%2C3&position=1&pageNum=0" - + url = url.replace(' ', '%20') + print(url) + # Add number of jobs to retrieve to limit limit = no_of_jobs_to_retrieve k1 = requests.get(url) # Run the beautiful soup soup1 = BeautifulSoup(k1.content, 'html.parser') - + print(soup1) string1 = soup1.find_all("a",{"class":"base-card__full-link"}) print(string1) description_dict = {} + job_role = [] for i in range(len(string1)): if role.lower() in string1[i].get_text().lower() and limit>0: dictionary = {} @@ -37,6 +40,7 @@ def get_job_description(resume_skills,all_skills, match_threshold, role, locatio #Replace the next line parameter with the blank space #Iterate the different job suggestions according to the given filters and fetch description for the jobs matching the search criteria of the user given. dictionary["Job Link"] = string1[i]['href'] + job_role.append(dictionary["Job Title"]) limit-=1 k = requests.get(string1[i]['href']).text soup=BeautifulSoup(k,'html.parser') @@ -48,4 +52,4 @@ def get_job_description(resume_skills,all_skills, match_threshold, role, locatio final_result=ke.get_user_id_to_list_of_job_ids(resume_skills,description_dict,all_skills,match_threshold) - return final_result + return job_role, final_result