From 965779c1b77459232b85d290cd4d01fa8d862c48 Mon Sep 17 00:00:00 2001 From: Re-st Date: Mon, 6 Nov 2023 15:32:20 +0000 Subject: [PATCH] Formatted with black --- scrap/local_councils/basic.py | 4 ++-- scrap/local_councils/busan.py | 1 + scrap/local_councils/gangwon.py | 12 ++++++++---- scrap/local_councils/gwangju.py | 18 ++++++++++++------ scrap/utils/spreadsheet.py | 6 ++++-- 5 files changed, 27 insertions(+), 14 deletions(-) diff --git a/scrap/local_councils/basic.py b/scrap/local_councils/basic.py index 413f327..b71e162 100644 --- a/scrap/local_councils/basic.py +++ b/scrap/local_councils/basic.py @@ -56,7 +56,7 @@ def get_name(profile, element, class_, wrapper_element, wrapper_class_): # span 태그 안의 것들을 다 지움 for span in name_tag.find_all("span"): span.decompose() - for a_tag in name_tag.find_all('a'): # 인천 서구 등. 안에 '개인홈페이지' 링크가 들음. + for a_tag in name_tag.find_all("a"): # 인천 서구 등. 안에 '개인홈페이지' 링크가 들음. a_tag.extract() name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" @@ -75,7 +75,7 @@ def get_name(profile, element, class_, wrapper_element, wrapper_class_): name = name.replace(keyword, "").strip() break maybe_name = name.split()[0] # 이름 뒤에 직책이 따라오는 경우 - if len(maybe_name) == 1: # 외자 이름이 띄어쓰기 때문에 분리된 경우 + if len(maybe_name) == 1: # 외자 이름이 띄어쓰기 때문에 분리된 경우 name = "".join(name.split()[0:2]) else: name = maybe_name diff --git a/scrap/local_councils/busan.py b/scrap/local_councils/busan.py index f19c2b2..fa1120b 100644 --- a/scrap/local_councils/busan.py +++ b/scrap/local_councils/busan.py @@ -9,6 +9,7 @@ from urllib.parse import urlparse from time import sleep + def scrap_26( url="https://www.bsjunggu.go.kr/council/board/list.junggu?boardId=BBS_0000118&menuCd=DOM_000000503003000000&contentsSid=755&cpath=%2Fcouncil", ) -> ScrapResult: diff --git a/scrap/local_councils/gangwon.py b/scrap/local_councils/gangwon.py index 40be1b6..326920f 100644 --- a/scrap/local_councils/gangwon.py +++ b/scrap/local_councils/gangwon.py @@ -8,9 +8,11 @@ from scrap.utils.requests import get_soup from scrap.local_councils.basic import * from scrap.utils.utils import getPartyList + party_keywords = getPartyList() party_keywords.append("무소속") + def scrap_107( url="https://council.wonju.go.kr/content/member/memberName.html", ) -> ScrapResult: @@ -39,10 +41,10 @@ def scrap_107( name_tag = info.find_element(By.CSS_SELECTOR, "dd[class='name']") name = name_tag.text.split("(")[0].strip() if name_tag else "이름 정보 없음" if len(name) > 3: - # 수식어가 이름 앞이나 뒤에 붙어있는 경우 - for keyword in ["부의장", "의원", "의장"]: # 119, 강서구 등 - if keyword in name: - name = name.replace(keyword, "").strip() + # 수식어가 이름 앞이나 뒤에 붙어있는 경우 + for keyword in ["부의장", "의원", "의장"]: # 119, 강서구 등 + if keyword in name: + name = name.replace(keyword, "").strip() party_tag = info.find_elements(By.TAG_NAME, "dd") for tag in party_tag: party = tag.text.split(" ")[-1] @@ -58,6 +60,8 @@ def scrap_107( council_type=CouncilType.LOCAL_COUNCIL, councilors=councilors, ) + + # 107: ScrapBasicArgument( # pf_memlistelt="div", # pf_memlistcls="content", diff --git a/scrap/local_councils/gwangju.py b/scrap/local_councils/gwangju.py index 28a8f48..b0c635c 100644 --- a/scrap/local_councils/gwangju.py +++ b/scrap/local_councils/gwangju.py @@ -8,9 +8,11 @@ from scrap.utils.types import CouncilType, Councilor, ScrapResult from scrap.utils.utils import getPartyList + party_keywords = getPartyList() party_keywords.append("무소속") + def scrap_62( url="http://www.gjnc.or.kr/main/contents/lawmakerDistrict", ) -> ScrapResult: @@ -64,6 +66,7 @@ def scrap_62( councilors=councilors, ) + def scrap_63( url="https://council.bukgu.gwangju.kr/index.do?PID=024", ) -> ScrapResult: @@ -89,7 +92,9 @@ def scrap_63( councilor_infos = browser.find_elements(By.CSS_SELECTOR, "ul[class='info']") for info in councilor_infos: - name_tag = info.find_element(By.CSS_SELECTOR, "li[class='name']").find_element(By.TAG_NAME, "h5") + name_tag = info.find_element(By.CSS_SELECTOR, "li[class='name']").find_element( + By.TAG_NAME, "h5" + ) name = name_tag.text.strip() if name_tag else "이름 정보 없음" party_tag = info.find_elements(By.TAG_NAME, "dd") for tag in party_tag: @@ -107,6 +112,7 @@ def scrap_63( councilors=councilors, ) + def scrap_64( url="https://gjgc.or.kr/main/contents/lawmaker", ) -> ScrapResult: @@ -135,10 +141,10 @@ def scrap_64( name_tag = info.find_element(By.TAG_NAME, "strong") name = name_tag.text.strip() if name_tag else "이름 정보 없음" if len(name) > 3: - # 수식어가 이름 앞이나 뒤에 붙어있는 경우 - for keyword in ["부의장", "의원", "의장"]: # 119, 강서구 등 - if keyword in name: - name = name.replace(keyword, "").strip() + # 수식어가 이름 앞이나 뒤에 붙어있는 경우 + for keyword in ["부의장", "의원", "의장"]: # 119, 강서구 등 + if keyword in name: + name = name.replace(keyword, "").strip() party_tag = info.find_elements(By.TAG_NAME, "dd") for tag in party_tag: party = tag.text.replace(" ", "") @@ -153,4 +159,4 @@ def scrap_64( council_id="64", council_type=CouncilType.LOCAL_COUNCIL, councilors=councilors, - ) \ No newline at end of file + ) diff --git a/scrap/utils/spreadsheet.py b/scrap/utils/spreadsheet.py index 12ee93d..602cb13 100644 --- a/scrap/utils/spreadsheet.py +++ b/scrap/utils/spreadsheet.py @@ -621,8 +621,10 @@ def main() -> None: N = 226 for n in range(57, 113): if n in no_information: - print(f"| {n} | 오류: 지난번 확인 시, 정당 정보 등이 홈페이지에 없었습니다."\ - "다시 확인해보시겠어요? 링크 : ", data[n - 1]["URL"]) + print( + f"| {n} | 오류: 지난번 확인 시, 정당 정보 등이 홈페이지에 없었습니다." "다시 확인해보시겠어요? 링크 : ", + data[n - 1]["URL"], + ) errors.append(n) continue encoding = "euc-kr" if n in euc_kr else "utf-8"