diff --git a/Code/Scrapper/scrapper_linkedIn.py b/Code/Scrapper/scrapper_linkedIn.py index b649ccf..d05f621 100644 --- a/Code/Scrapper/scrapper_linkedIn.py +++ b/Code/Scrapper/scrapper_linkedIn.py @@ -12,15 +12,18 @@ import requests +# Form a function get_job_description fetching details of location, resume skills, match_threshold,Job roles def get_job_description(connection,resume_skills,all_skills, match_threshold, role, location, no_of_jobs_to_retrieve, data): # role = role.replace(' ', '%20') + #Form a dynamic URL to fetch the details using Beautiful soup for the given filters url = "https://www.linkedin.com/jobs/jobs-in-"+location+"?keywords="+role+"&f_JT=F%2CP&f_E=1%2C3&position=1&pageNum=0" + # Add number of jobs to retrieve to limit limit = no_of_jobs_to_retrieve k1 = requests.get(url) - + # Run the beautiful soup soup1 = BeautifulSoup(k1.content, 'html.parser') string1 = soup1.find_all("a",{"class":"base-card__full-link"}) @@ -29,7 +32,9 @@ def get_job_description(connection,resume_skills,all_skills, match_threshold, ro for i in range(len(string1)): if role.lower() in string1[i].get_text().lower() and limit>0: dictionary = {} + #Get the data for job title and job link and store it in dictionary dictionary["Job Title"] = string1[i].get_text().replace('\n',' ').replace(' ','') + #Replace the next line parameter with the blank space dictionary["Job Link"] = string1[i]['href'] limit-=1 k = requests.get(string1[i]['href']).text