-
Notifications
You must be signed in to change notification settings - Fork 0
/
HH_parser.py
27 lines (25 loc) · 1.17 KB
/
HH_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
base_url = 'https://hh.ru/search/vacancy?area=1&search_period=3&text=python&page=0'
def hh_parse(base_url, headers):
jobs = []
session = requests.Session()
request = session.get(base_url, headers=headers)
if request.status_code == 200:
soup = bs(request.content, 'html.parser')
divs = soup.find_all('div', attrs={'data-qa': 'vacancy-serp__vacancy'})
for div in divs:
title = div.find('a', attrs={'data-qa': 'vacancy-serp__vacancy-title'}).text
href = div.find('a', attrs={'data-qa': 'vacancy-serp__vacancy-title'})['href']
company = div.find('a', attrs={'data-qa': 'vacancy-serp__vacancy-employer'}).text
text1 = div.find('div', attrs={'data-qa': 'vacancy-serp__vacancy_snippet_responsibility'}).text
text2 = div.find('div', attrs={'data-qa': 'vacancy-serp__vacancy_snippet_requirement'}).text
content = text1 + ' ' + text2
jobs.append({
'title': title,
'href': href,
'company': company,
'content': content
})
print(jobs)
else:
print('ERROR')
hh_parse(base_url, headers)