Skip to content

Commit

Permalink
Formatted with black
Browse files Browse the repository at this point in the history
  • Loading branch information
Re-st authored and github-actions[bot] committed Nov 27, 2023
1 parent 693e5ac commit 89bd06d
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 28 deletions.
19 changes: 15 additions & 4 deletions scrap/local_councils/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,9 @@ def extract_party(string):
return None


def goto_profilesite(profile, wrapper_element, wrapper_class_, wrapper_txt, url, inner_euckr=False):
def goto_profilesite(
profile, wrapper_element, wrapper_class_, wrapper_txt, url, inner_euckr=False
):
# 의원 프로필에서 프로필보기 링크를 가져옴
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
Expand Down Expand Up @@ -224,7 +226,9 @@ def getpty(profile, element, class_, wrapper_element, wrapper_class_, wrapper_tx
raise Exception("[basic.py] 정당 정보 파싱 불가")


def getpty_easy(profile, wrapper_element, wrapper_class_, wrapper_txt, url, inner_euckr=False):
def getpty_easy(
profile, wrapper_element, wrapper_class_, wrapper_txt, url, inner_euckr=False
):
# 의원 프로필에서 의원이 몸담는 정당 이름을 가져옴
if wrapper_element is not None:
profile = goto_profilesite(
Expand Down Expand Up @@ -253,7 +257,9 @@ def sel_getpty_easy(
return party


def scrap_basic(url, cid, args: ScrapBasicArgument, encoding="utf-8", inner_euckr=False) -> ScrapResult:
def scrap_basic(
url, cid, args: ScrapBasicArgument, encoding="utf-8", inner_euckr=False
) -> ScrapResult:
"""의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:param cid: 의회 id
Expand Down Expand Up @@ -293,7 +299,12 @@ def scrap_basic(url, cid, args: ScrapBasicArgument, encoding="utf-8", inner_euck
except Exception as e:
try:
party = getpty_easy(
profile, args.pty_wrapelt, args.pty_wrapcls, args.pty_wraptxt, url, inner_euckr
profile,
args.pty_wrapelt,
args.pty_wrapcls,
args.pty_wraptxt,
url,
inner_euckr,
)
except Exception:
raise RuntimeError("[basic.py] 의원 정당을 가져오는데 실패했습니다. 이유: " + str(e))
Expand Down
33 changes: 20 additions & 13 deletions scrap/local_councils/gyeongsang.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
party_keywords = getPartyList()
party_keywords.append("무소속")


def scrap_186(
url,
cid,
Expand Down Expand Up @@ -124,6 +125,7 @@ def scrap_191(

return ret_local_councilors(cid, councilors)


def scrap_192(
url,
cid,
Expand All @@ -142,10 +144,10 @@ def scrap_192(
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
profile_url = base_url + profile_link["href"]
profile = get_soup(profile_url, verify=False, encoding="euc-kr")
party=""
party = ""
for keyword in party_keywords:
if keyword in profile.text:
party=keyword
party = keyword
break
councilors.append(Councilor(name=name, jdName=party))

Expand Down Expand Up @@ -225,11 +227,13 @@ def scrap_197(
"""경상북도 경산시"""
soup = get_soup(url, verify=False, encoding="euc-kr")
councilors: List[Councilor] = []
for profile in soup.find_all('div', class_='memberL') + soup.find_all('div', class_='memberR'):
party = profile.find_previous('h4', class_='title').text.strip()
assert(party in party_keywords)
name = profile.find('dt').text.strip()

for profile in soup.find_all("div", class_="memberL") + soup.find_all(
"div", class_="memberR"
):
party = profile.find_previous("h4", class_="title").text.strip()
assert party in party_keywords
name = profile.find("dt").text.strip()

councilors.append(Councilor(name=name, jdName=party))

return ret_local_councilors(cid, councilors)
Expand Down Expand Up @@ -323,15 +327,16 @@ def scrap_202(
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
profile_url = base_url + link
profile = get_soup(profile_url, verify=False, encoding="euc-kr")
party=""
party = ""
for keyword in party_keywords:
if keyword in profile.text:
party=keyword
party = keyword
break
councilors.append(Councilor(name=name, jdName=party))

return ret_local_councilors(cid, councilors)


def scrap_203(
url,
cid,
Expand All @@ -353,6 +358,7 @@ def scrap_203(

return ret_local_councilors(cid, councilors)


def scrap_204(
url,
cid,
Expand All @@ -369,19 +375,20 @@ def scrap_204(
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
profile_url = base_url + link
profile = get_soup(profile_url, verify=False)
link = profile.find('a', text='의원소개', href=True)
profile_url = base_url + link['href']
link = profile.find("a", text="의원소개", href=True)
profile_url = base_url + link["href"]
profile = get_soup(profile_url, verify=False)

party=""
party = ""
for keyword in party_keywords:
if keyword in profile.text:
party=keyword
party = keyword
break
councilors.append(Councilor(name=name, jdName=party))

return ret_local_councilors(cid, councilors)


def scrap_206(
url,
cid,
Expand Down
15 changes: 12 additions & 3 deletions scrap/local_councils/jeolla.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
party_keywords = getPartyList()
party_keywords.append("무소속")


def scrap_154(
url,
cid,
Expand Down Expand Up @@ -337,6 +338,7 @@ def scrap_167(

# return ret_local_councilors(cid, councilors)


def scrap_175(
url,
cid,
Expand All @@ -346,7 +348,9 @@ def scrap_175(
browser = get_selenium(url)
councilors: list[Councilor] = []
for profileList in browser.find_elements(By.CSS_SELECTOR, "ul[id='councilList']"):
for profile in profileList.find_elements(By.CSS_SELECTOR, "ul[class='name_51']"):
for profile in profileList.find_elements(
By.CSS_SELECTOR, "ul[class='name_51']"
):
name_tag = profile.find_element(By.TAG_NAME, "li")
name = name_tag.text.strip() if name_tag else "이름 정보 없음"

Expand All @@ -362,6 +366,7 @@ def scrap_175(

return ret_local_councilors(cid, councilors)


def scrap_177(
url,
cid,
Expand Down Expand Up @@ -393,8 +398,12 @@ def scrap_178(
"""전라남도 완도군"""
browser = get_selenium(url)
councilors: list[Councilor] = []
for profileList in browser.find_elements(By.CSS_SELECTOR, "div[class='congressperson_list']"):
for profile in profileList.find_elements(By.CSS_SELECTOR, "div[class='col-lg-6']"):
for profileList in browser.find_elements(
By.CSS_SELECTOR, "div[class='congressperson_list']"
):
for profile in profileList.find_elements(
By.CSS_SELECTOR, "div[class='col-lg-6']"
):
name_tag = profile.find_element(By.TAG_NAME, "strong")
name = name_tag.text.strip() if name_tag else "이름 정보 없음"
profile_link = sel_find(profile, "a", class_="icon_btn")
Expand Down
6 changes: 5 additions & 1 deletion scrap/utils/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,10 @@ def get_records_from_data_source(self, data_source: str):
# Helper Functions
def is_euc_kr(self, n: int) -> bool:
return n in self.runner_args["euc_kr"]

def inner_euckr(self, n: int) -> bool:
return n in self.runner_args["inner_euckr"]

def is_special_function(self, n: int) -> bool:
return n in self.runner_args["special_functions"]

Expand Down Expand Up @@ -136,7 +138,9 @@ def run_single(self, cid: int) -> ScrapResult:
if self.is_selenium_basic(cid):
result = sel_scrap_basic(council_url, cid, council_args)
else:
result = scrap_basic(council_url, cid, council_args, encoding, inner_euckr)
result = scrap_basic(
council_url, cid, council_args, encoding, inner_euckr
)

return result

Expand Down
21 changes: 14 additions & 7 deletions scrap/utils/spreadsheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def scrap_all_metro_councils() -> None:
# )
# email_result(emessages)


def scrap_all_local_councils() -> None:
# TODO - 홈페이지 위 charset=euc-kr 등을 인식해 바로 가져오기.
euc_kr = [
Expand All @@ -138,7 +139,7 @@ def scrap_all_local_councils() -> None:
202,
222,
]
inner_euckr=[200]
inner_euckr = [200]
special_functions = (
list(range(1, 57))
+ [62, 63, 64, 88, 97, 103, 107]
Expand All @@ -153,14 +154,18 @@ def scrap_all_local_councils() -> None:
188,
189,
190,
191,192,
191,
192,
194,
195,
196,197,
196,
197,
198,
199,
201,202,
203,204,
201,
202,
203,
204,
206,
208,
209,
Expand Down Expand Up @@ -225,7 +230,9 @@ def scrap_all_local_councils() -> None:
result = str(sel_scrap_basic(council_url, n, council_args).councilors)
else:
result = str(
scrap_basic(council_url, n, council_args, encoding, inner_euckr).councilors
scrap_basic(
council_url, n, council_args, encoding, inner_euckr
).councilors
)
if "정보 없음" in result:
emsg = "스크랩 결과에 '정보 없음'이 포함되어 있습니다. 일부 인명에\
Expand All @@ -241,7 +248,7 @@ def scrap_all_local_councils() -> None:
except Exception as e:
print(e)
print(result)
# add_error(n, "기타 오류 - " + str(e))
# add_error(n, "기타 오류 - " + str(e))
# emessages = (
# f"""
# 총 실행 횟수: {N}
Expand Down

0 comments on commit 89bd06d

Please sign in to comment.