Skip to content

Commit

Permalink
Formatted with black
Browse files Browse the repository at this point in the history
  • Loading branch information
Re-st authored and github-actions[bot] committed Nov 6, 2023
1 parent e852d45 commit 965779c
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 14 deletions.
4 changes: 2 additions & 2 deletions scrap/local_councils/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def get_name(profile, element, class_, wrapper_element, wrapper_class_):
# span 태그 안의 것들을 다 지움
for span in name_tag.find_all("span"):
span.decompose()
for a_tag in name_tag.find_all('a'): # 인천 서구 등. 안에 '개인홈페이지' 링크가 들음.
for a_tag in name_tag.find_all("a"): # 인천 서구 등. 안에 '개인홈페이지' 링크가 들음.
a_tag.extract()
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"

Expand All @@ -75,7 +75,7 @@ def get_name(profile, element, class_, wrapper_element, wrapper_class_):
name = name.replace(keyword, "").strip()
break
maybe_name = name.split()[0] # 이름 뒤에 직책이 따라오는 경우
if len(maybe_name) == 1: # 외자 이름이 띄어쓰기 때문에 분리된 경우
if len(maybe_name) == 1: # 외자 이름이 띄어쓰기 때문에 분리된 경우
name = "".join(name.split()[0:2])
else:
name = maybe_name
Expand Down
1 change: 1 addition & 0 deletions scrap/local_councils/busan.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from urllib.parse import urlparse
from time import sleep


def scrap_26(
url="https://www.bsjunggu.go.kr/council/board/list.junggu?boardId=BBS_0000118&menuCd=DOM_000000503003000000&contentsSid=755&cpath=%2Fcouncil",
) -> ScrapResult:
Expand Down
12 changes: 8 additions & 4 deletions scrap/local_councils/gangwon.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
from scrap.utils.requests import get_soup
from scrap.local_councils.basic import *
from scrap.utils.utils import getPartyList

party_keywords = getPartyList()
party_keywords.append("무소속")


def scrap_107(
url="https://council.wonju.go.kr/content/member/memberName.html",
) -> ScrapResult:
Expand Down Expand Up @@ -39,10 +41,10 @@ def scrap_107(
name_tag = info.find_element(By.CSS_SELECTOR, "dd[class='name']")
name = name_tag.text.split("(")[0].strip() if name_tag else "이름 정보 없음"
if len(name) > 3:
# 수식어가 이름 앞이나 뒤에 붙어있는 경우
for keyword in ["부의장", "의원", "의장"]: # 119, 강서구 등
if keyword in name:
name = name.replace(keyword, "").strip()
# 수식어가 이름 앞이나 뒤에 붙어있는 경우
for keyword in ["부의장", "의원", "의장"]: # 119, 강서구 등
if keyword in name:
name = name.replace(keyword, "").strip()
party_tag = info.find_elements(By.TAG_NAME, "dd")
for tag in party_tag:
party = tag.text.split(" ")[-1]
Expand All @@ -58,6 +60,8 @@ def scrap_107(
council_type=CouncilType.LOCAL_COUNCIL,
councilors=councilors,
)


# 107: ScrapBasicArgument(
# pf_memlistelt="div",
# pf_memlistcls="content",
Expand Down
18 changes: 12 additions & 6 deletions scrap/local_councils/gwangju.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@

from scrap.utils.types import CouncilType, Councilor, ScrapResult
from scrap.utils.utils import getPartyList

party_keywords = getPartyList()
party_keywords.append("무소속")


def scrap_62(
url="http://www.gjnc.or.kr/main/contents/lawmakerDistrict",
) -> ScrapResult:
Expand Down Expand Up @@ -64,6 +66,7 @@ def scrap_62(
councilors=councilors,
)


def scrap_63(
url="https://council.bukgu.gwangju.kr/index.do?PID=024",
) -> ScrapResult:
Expand All @@ -89,7 +92,9 @@ def scrap_63(
councilor_infos = browser.find_elements(By.CSS_SELECTOR, "ul[class='info']")

for info in councilor_infos:
name_tag = info.find_element(By.CSS_SELECTOR, "li[class='name']").find_element(By.TAG_NAME, "h5")
name_tag = info.find_element(By.CSS_SELECTOR, "li[class='name']").find_element(
By.TAG_NAME, "h5"
)
name = name_tag.text.strip() if name_tag else "이름 정보 없음"
party_tag = info.find_elements(By.TAG_NAME, "dd")
for tag in party_tag:
Expand All @@ -107,6 +112,7 @@ def scrap_63(
councilors=councilors,
)


def scrap_64(
url="https://gjgc.or.kr/main/contents/lawmaker",
) -> ScrapResult:
Expand Down Expand Up @@ -135,10 +141,10 @@ def scrap_64(
name_tag = info.find_element(By.TAG_NAME, "strong")
name = name_tag.text.strip() if name_tag else "이름 정보 없음"
if len(name) > 3:
# 수식어가 이름 앞이나 뒤에 붙어있는 경우
for keyword in ["부의장", "의원", "의장"]: # 119, 강서구 등
if keyword in name:
name = name.replace(keyword, "").strip()
# 수식어가 이름 앞이나 뒤에 붙어있는 경우
for keyword in ["부의장", "의원", "의장"]: # 119, 강서구 등
if keyword in name:
name = name.replace(keyword, "").strip()
party_tag = info.find_elements(By.TAG_NAME, "dd")
for tag in party_tag:
party = tag.text.replace(" ", "")
Expand All @@ -153,4 +159,4 @@ def scrap_64(
council_id="64",
council_type=CouncilType.LOCAL_COUNCIL,
councilors=councilors,
)
)
6 changes: 4 additions & 2 deletions scrap/utils/spreadsheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,8 +621,10 @@ def main() -> None:
N = 226
for n in range(57, 113):
if n in no_information:
print(f"| {n} | 오류: 지난번 확인 시, 정당 정보 등이 홈페이지에 없었습니다."\
"다시 확인해보시겠어요? 링크 : ", data[n - 1]["URL"])
print(
f"| {n} | 오류: 지난번 확인 시, 정당 정보 등이 홈페이지에 없었습니다." "다시 확인해보시겠어요? 링크 : ",
data[n - 1]["URL"],
)
errors.append(n)
continue
encoding = "euc-kr" if n in euc_kr else "utf-8"
Expand Down

0 comments on commit 965779c

Please sign in to comment.